Author

Dylan Li

Libraries

Code
library(tidyverse)
library(tidymodels)
library(glmnet)
library(discrim)
library(rpart)
library(rpart.plot)
library(baguette)
library(tidyclust)
library(caret)
library(RColorBrewer)

Grid Setup

Code
sample_grid <- matrix(c("Bear", "Bee", "Meadow", "Bear", "Meadow", "Meadow", "Bee", "Meadow", "Bee"),3,3,byrow=TRUE)
sample_grid
     [,1]   [,2]     [,3]    
[1,] "Bear" "Bee"    "Meadow"
[2,] "Bear" "Meadow" "Meadow"
[3,] "Bee"  "Meadow" "Bee"   
Code
sample_grid2 <- matrix(c("Meadow", "Meadow", "Bee", "Meadow", "Bee", "Meadow", "Bee", "Meadow", "Meadow"),3,3,byrow=TRUE)
sample_grid2
     [,1]     [,2]     [,3]    
[1,] "Meadow" "Meadow" "Bee"   
[2,] "Meadow" "Bee"    "Meadow"
[3,] "Bee"    "Meadow" "Meadow"
Code
big_grid1 <- matrix(c("Deer", "Meadow", "Bee", "Bear", "Fox", "Wolf", "Meadow", "Meadow", "Trout", "Stream",
                      "Deer", "Eagle", "Meadow", "Trout", "Stream", "Fox", "Rabbit", "Stream", "Dragonfly",
                      "Stream"),4,5,byrow=TRUE)
big_grid1
     [,1]   [,2]     [,3]     [,4]        [,5]    
[1,] "Deer" "Meadow" "Bee"    "Bear"      "Fox"   
[2,] "Wolf" "Meadow" "Meadow" "Trout"     "Stream"
[3,] "Deer" "Eagle"  "Meadow" "Trout"     "Stream"
[4,] "Fox"  "Rabbit" "Stream" "Dragonfly" "Stream"
Code
twenty_seven_x <- rep("x", 27)

super_vec <- append(twenty_seven_x, 
                      c("x", "x", 
                        "Deer", "Meadow", "Bee", "Bear", "Fox", 
                        "x", "x", 
                        "x", "x", 
                        "Wolf", "Meadow", "Meadow", "Trout", "Stream",
                        "x", "x", 
                        "x", "x", 
                        "Deer", "Eagle", "Meadow", "Trout", "Stream", 
                        "x", "x", 
                        "x", "x", 
                        "Fox", "Rabbit", "Stream", "Dragonfly","Stream",
                        "x", "x"))

super_grid1 <- matrix(super_vec,7,9,byrow=TRUE)
super_grid1
     [,1] [,2] [,3]   [,4]     [,5]     [,6]        [,7]     [,8] [,9]
[1,] "x"  "x"  "x"    "x"      "x"      "x"         "x"      "x"  "x" 
[2,] "x"  "x"  "x"    "x"      "x"      "x"         "x"      "x"  "x" 
[3,] "x"  "x"  "x"    "x"      "x"      "x"         "x"      "x"  "x" 
[4,] "x"  "x"  "Deer" "Meadow" "Bee"    "Bear"      "Fox"    "x"  "x" 
[5,] "x"  "x"  "Wolf" "Meadow" "Meadow" "Trout"     "Stream" "x"  "x" 
[6,] "x"  "x"  "Deer" "Eagle"  "Meadow" "Trout"     "Stream" "x"  "x" 
[7,] "x"  "x"  "Fox"  "Rabbit" "Stream" "Dragonfly" "Stream" "x"  "x" 

Board Generation

Code
cards <- c(rep("Bear", 12), 
           rep("Bee", 8), 
           rep("Meadow", 20),
           rep("Trout", 10),
           rep("Eagle", 8),
           rep("Rabbit", 8),
           rep("Dragonfly", 8),
           rep("Fox", 12),
           rep("Deer", 12),
           rep("Stream", 20),
           rep("Wolf", 12)
           )

generate_grid <- function(pool, partial_grid = NULL){
  if(is.null(partial_grid) == FALSE){
    blanks <- which(partial_grid == "x", TRUE)
    n = 20 - nrow(blanks)
    
  }else{
    n = 20
  }
  
  sample <- sample(pool, n)
  
  if(is.null(partial_grid) == FALSE){
    
    board <- partial_grid
    
    for (i in 1:nrow(blanks)){
      loc <- c(blanks[[i, 1]], blanks[[i, 2]])
      board[blanks[[i, 1]], blanks[[i, 2]]] = sample[i]
    }
    
  }else{
    board <- matrix(sample, nrow=4, ncol=5, byrow=TRUE)
  }
  
  return(board)
}

generate_grid(cards)
     [,1]    [,2]     [,3]        [,4]     [,5]  
[1,] "Eagle" "Bear"   "Dragonfly" "Stream" "Bear"
[2,] "Eagle" "Stream" "Meadow"    "Wolf"   "Wolf"
[3,] "Bee"   "Eagle"  "Eagle"     "Fox"    "Bear"
[4,] "Wolf"  "Deer"   "Eagle"     "Stream" "Deer"

Helper functions

Code
find_cardinals <- function(i, j, grid){
  cardinals <- list()
  maxrow = nrow(grid)
  maxcol = ncol(grid)
      
  if(i+1 <= maxrow){
    cardinals[[length(cardinals)+1]] <- c(i+1,j)
  }
  
  if(i-1 > 0){
    cardinals[[length(cardinals)+1]] <- c(i-1,j)
  }
  
  if(j+1 <= maxcol){
    cardinals[[length(cardinals)+1]] <- c(i,j+1)
  }
  
  if(j-1 > 0){
    cardinals[[length(cardinals)+1]] <- c(i,j-1)
  }
  
  return(cardinals)
}
Code
find_two_spaces <- function(i, j, grid){
  two_space <- list()
  maxrow = nrow(grid)
  maxcol = ncol(grid)
      
  if(i+1 <= maxrow){
    two_space[[length(two_space)+1]] <- c(i+1,j)
  }
  
  if(i+2 <= maxrow){
    two_space[[length(two_space)+1]] <- c(i+2,j)
  }
  
  if(i-1 > 0){
    two_space[[length(two_space)+1]] <- c(i-1,j)
  }
  
  if(i-2 > 0){
    two_space[[length(two_space)+1]] <- c(i-2,j)
  }
  
  if(j+1 <= maxcol){
    two_space[[length(two_space)+1]] <- c(i,j+1)
  }
  
  if(j+2 <= maxcol){
    two_space[[length(two_space)+1]] <- c(i,j+2)
  }
  
  if(j-1 > 0){
    two_space[[length(two_space)+1]] <- c(i,j-1)
  }
  
  if(j-2 > 0){
    two_space[[length(two_space)+1]] <- c(i,j-2)
  }
  
  if(i+1 <= maxrow && j+1 <= maxcol){
    two_space[[length(two_space)+1]] <- c(i+1,j+1)
  }
  
  if(i+1 <= maxrow && j-1 > 0){
    two_space[[length(two_space)+1]] <- c(i+1,j-1)
  }
  
  if(i-1 > 0 && j+1 <= maxcol){
    two_space[[length(two_space)+1]] <- c(i-1,j+1)
  }
  
  if(i-1 > 0 && j-1 > 0){
    two_space[[length(two_space)+1]] <- c(i-1,j-1)
  }
  
  return(two_space)
}
Code
find_more_meadows <- function(i, j, grid, meadow_list){
  meadow_list[[length(meadow_list)+1]] = as.double(c(i,j))
  current_caridnals <- find_cardinals(i, j, grid)
  for (k in current_caridnals){
    if (grid[k[1],k[2]] == "Meadow"){
      if ((list(k) %in% meadow_list) == FALSE){
        meadow_list = find_more_meadows(as.double(k[1]),as.double(k[2]), grid, meadow_list)
      }
    }
  }
  return(meadow_list)
  
}
Code
find_more_streams <- function(i, j, grid, stream_list){
  stream_list[[length(stream_list)+1]] = as.double(c(i,j))
  current_caridnals <- find_cardinals(i, j, grid)
  for (k in current_caridnals){
    if (grid[k[1],k[2]] == "Stream"){
      if ((list(k) %in% stream_list) == FALSE){
        stream_list = find_more_streams(as.double(k[1]),as.double(k[2]), grid, stream_list)
      }
    }
  }
  return(stream_list)
  
}

Scoring function

Code
score_grid <- function(grid, individual=FALSE){
  
  score = 0
  
  meadow_patch = list()
  first_meadow = TRUE
  stream_patch = list()
  first_stream = TRUE
  dragonfly_list = list()
  deer_row <- c()
  deer_col <- c()
  # first_wolf = TRUE
  num_wolves = 0
  
  
  bear_score = 0
  bee_score = 0
  meadow_score = 0
  trout_score = 0
  eagle_score = 0
  rabbit_score = 0
  dragonfly_score = 0
  fox_score = 0
  deer_score = 0
  stream_score = 0
  wolf_score = 0
  diversity_score = 0
  
  for (i in 1:nrow(grid)) {
    for (j in 1:ncol(grid)) {
      
      current_caridnals <- find_cardinals(i, j, grid)
      
      
      if(grid[i,j] == "Bear"){
        for (k in current_caridnals){
          if (grid[k[1],k[2]] == "Bee" || grid[k[1],k[2]] == "Trout"){
            score = score + 2
            bear_score = bear_score + 2
          }
        }
      }
      
      if(grid[i,j] == "Bee"){
        for (k in current_caridnals){
          if (grid[k[1],k[2]] == "Meadow"){
            score = score + 3
            bee_score = bee_score + 3
          }
        }
      }
      
      if(grid[i,j] == "Meadow"){
        if (first_meadow == TRUE){
          
          first_meadow = FALSE
          first_patch = list()
          completed_patch = find_more_meadows(as.double(i), as.double(j), grid, first_patch)
          meadow_patch[[length(meadow_patch)+1]] = completed_patch
          
        }else{
          
          exist = FALSE
          
          for (x in 1:length(meadow_patch)){
            if (list(as.double(c(i,j))) %in% meadow_patch[[x]]){
              exist = TRUE
            }
          }
          
          if (exist == FALSE){
            new_patch = list()
            completed_patch = find_more_meadows(as.double(i), as.double(j), grid, new_patch)
            meadow_patch[[length(meadow_patch)+1]] = completed_patch
          }
          
        }
      }
      
      if(grid[i,j] == "Trout"){
        for (k in current_caridnals){
          if (grid[k[1],k[2]] == "Dragonfly" || grid[k[1],k[2]] == "Stream"){
            score = score + 2
            trout_score = trout_score + 2
          }
        }
      }
      
      if(grid[i,j] == "Eagle"){
        
        two_space <- find_two_spaces(i, j, grid)
        
        for (k in two_space){
          if (grid[k[1],k[2]] == "Trout" || grid[k[1],k[2]] == "Rabbit"){
            score = score + 2
            eagle_score = eagle_score + 2
          }
        }
      }
      
      if(grid[i,j] == "Rabbit"){
        score = score + 1
        rabbit_score = rabbit_score + 1
      }
      
      if(grid[i,j] == "Dragonfly"){
        dragonfly_list[[length(dragonfly_list)+1]] = as.double(c(i,j))
      }
      
      if(grid[i,j] == "Fox"){
        score_it = TRUE
        
        for (k in current_caridnals){
          if (grid[k[1],k[2]] == "Bear" || grid[k[1],k[2]] == "Wolf"){
            score_it = FALSE
          }
        }
        
        if (score_it){
          score = score + 3
          fox_score = fox_score + 3
        }
      }
      
      if(grid[i,j] == "Deer"){
        deer_row <- append(deer_row, i)
        deer_col <- append(deer_col, j)
      }
      
      if(grid[i,j] == "Stream"){
        if (first_stream == TRUE){
          
          first_stream = FALSE
          first_patch = list()
          completed_patch = find_more_streams(as.double(i), as.double(j), grid, first_patch)
          stream_patch[[length(stream_patch)+1]] = completed_patch
          
        }else{
          
          exist = FALSE
          
          for (x in 1:length(stream_patch)){
            if (list(as.double(c(i,j))) %in% stream_patch[[x]]){
              exist = TRUE
            }
          }
          
          if (exist == FALSE){
            new_patch = list()
            completed_patch = find_more_streams(as.double(i), as.double(j), grid, new_patch)
            stream_patch[[length(stream_patch)+1]] = completed_patch
          }
          
        }
      }
      
      if(grid[i,j] == "Wolf"){
        # temporary stand in, can only be scored properly with more than 1 player
        # if (first_wolf == TRUE){
        #   score = score + 8
        #   wolf_score = wolf_score + 8
        #   first_wolf == FALSE
        # }
        
        num_wolves = num_wolves + 1
      }
      
    }
  }
  
  for (i in meadow_patch){
    if (length(i) == 2){
      score = score + 3
      meadow_score = meadow_score + 3
    }else if (length(i) == 3){
      score = score + 6
      meadow_score = meadow_score + 6
    }else if (length(i) == 4){
      score = score + 10
      meadow_score = meadow_score + 10
    }else if (length(i) >= 5){
      score = score + 15
      meadow_score = meadow_score + 15
    }
  }
  
  largest_stream = 0
  
  for (i in stream_patch){
    if (length(i) > largest_stream){
      largest_stream = length(i)
    }
  }
  
  # temporary scoring for largest stream, can only be scored properly with more than 1 player
  # if (largest_stream > 0){
  #   score = score + 5
  #   stream_score = stream_score + 5
  # }
  
  for (d in dragonfly_list){
    current_caridnals <- find_cardinals(d[1], d[2], grid)
    
    largest_score = 0
    
    for (k in current_caridnals){
      if (grid[k[1],k[2]] == "Stream"){
        for (s in stream_patch){
          if((list(k) %in% s) == TRUE){
            current_score = 2 * length(s)
            if (current_score > largest_score){
              largest_score = current_score
            }
          }
        }
      }
    }
    
    score = score + largest_score
    dragonfly_score = dragonfly_score + largest_score
  }
  
  score = score + 2*length(unique(deer_row))
  score = score + 2*length(unique(deer_col))
  
  deer_score = deer_score + 2*length(unique(deer_row))
  deer_score = deer_score + 2*length(unique(deer_col))
  
  diversity_matrix <- matrix(c(bear_score, bee_score, meadow_score, trout_score, eagle_score, rabbit_score,
           dragonfly_score, fox_score, deer_score))
  
  diversity_vector <- c(bear_score, bee_score, meadow_score, trout_score, eagle_score, rabbit_score,
           dragonfly_score, fox_score, deer_score)

  gaps = colSums(diversity_matrix == 0)[1]
   
  # if(gaps >= 6){
  #   score = score - 5
  #   diversity_score = -5
  # }else if (gaps == 4){
  #   score = score + 3
  #   diversity_score = 3
  # }else if (gaps == 3){
  #   score = score + 7
  #   diversity_score = 7
  # }else if (gaps <= 2){
  #   score = score + 12
  #   diversity_score = 12
  # }
  if(individual==TRUE){
    if(largest_stream == 0){
      gaps = gaps + 1
    }
    if(num_wolves == 0){
      gaps = gaps + 1
    }
  
    if(gaps >= 6){
        dv_score = -5
      }else if (gaps == 4){
        dv_score = 3
      }else if (gaps == 3){
        dv_score = 7
      }else if (gaps <= 2){
        dv_score = 12
      }else{
        dv_score = 0
      }
    
    return(c(diversity_vector,largest_stream, num_wolves, dv_score))
  }else{
    return(c(score, largest_stream, num_wolves, gaps))
  }
  
}

Solo play scoring

Code
solo_score <- function(score_vector){
  score = score_vector[1]
  stream_size = score_vector[2]
  num_wolves = score_vector[3]
  num_gaps = score_vector[4]
  
  score = score + stream_size + num_wolves
  
  if(stream_size == 0){
    num_gaps = num_gaps + 1
  }
  if(num_wolves == 0){
    num_gaps = num_gaps + 1
  }
  
  if(num_gaps >= 6){
      score = score - 5
    }else if (num_gaps == 4){
      score = score + 3
    }else if (num_gaps == 3){
      score = score + 7
    }else if (num_gaps <= 2){
      score = score + 12
    }
  
  return(score)
}

Multiplayer Scoring

Code
mp_score <- function(score_list){
  
  # each entry in score_list follows the format: c(score, size_of_largest_stream, num_wolves, diversity_gaps)
  
  if(length(score_list) > 2){
    more_than_2 = TRUE
  }else{
    more_than_2 = FALSE
  }
  
  score <- rep(0, length(score_list))
  stream_size <- rep(0, length(score_list))
  num_wolves <- rep(0, length(score_list))
  num_gaps <- rep(0, length(score_list))
  
  for (i in 1:length(score_list)){
    
    score[i] = score_list[[i]][1]
    
    stream_size[i] = score_list[[i]][2]
    
    num_wolves[i] = score_list[[i]][3]
    
    num_gaps[i] = score_list[[i]][4] + 2
    
  }
  
  print(score)
  
  stream_size = sort(stream_size, decreasing = TRUE)
  num_wolves = sort(num_wolves, decreasing = TRUE)
  
  stream_matrix = matrix(stream_size)
  wolf_matrix = matrix(num_wolves)
  
  largest_stream = stream_size[1]
  second_stream = stream_size[2]
  
  score_largest_s = TRUE
  score_second_s = TRUE
  
  if(colSums(stream_matrix == largest_stream)[1] > 1){
    score_second_s = FALSE
  }
  
  for (i in 1:length(score_list)){
      if (score_list[[i]][2] == largest_stream && largest_stream != 0){
        score[i] = score[i] + 8
        num_gaps[i] = num_gaps[i] - 1
      }
  }
  
  if(score_second_s == TRUE){
    for (i in 1:length(score_list)){
      if (score_list[[i]][2] == second_stream && second_stream != 0){
        score[i] = score[i] + 5
        num_gaps[i] = num_gaps[i] - 1
      }
    }
  }
  
  print(score)
  
  most_wolves = num_wolves[1]
  second_wolves = num_wolves[2]
  if(more_than_2){
    third_wolves = num_wolves[3]
  }
  
  score_most_w = TRUE
  score_second_w = TRUE
  score_third_w = TRUE
  
  if(colSums(wolf_matrix == most_wolves)[1] > 1){
    score_second_w = FALSE
    if(colSums(wolf_matrix == most_wolves)[1] > 2){
      score_third_w = FALSE
    }
  }
  
  for (i in 1:length(score_list)){
      if (score_list[[i]][3] == most_wolves){
        score[i] = score[i] + 12
        num_gaps[i] = num_gaps[i] - 1
      }
  }
  
  if(colSums(wolf_matrix == second_wolves)[1] > 1){
    score_third_w = FALSE
  }
  
  if(score_second_w == TRUE){
    for (i in 1:length(score_list)){
      if (score_list[[i]][3] == second_wolves){
        score[i] = score[i] + 8
        num_gaps[i] = num_gaps[i] - 1
      }
    }
  }
  
  if(score_third_w == TRUE && more_than_2 == TRUE){
    for (i in 1:length(score_list)){
      if (score_list[[i]][3] == third_wolves){
        score[i] = score[i] + 4
        num_gaps[i] = num_gaps[i] - 1
      }
    }
  }
  
  print(score)
  
  for(i in 1:length(score_list)){
    if(num_gaps[i] >= 6){
      score[i] = score[i] - 5
    }else if (num_gaps[i] == 4){
      score[i] = score[i] + 3
    }else if (num_gaps[i] == 3){
      score[i] = score[i] + 7
    }else if (num_gaps[i] <= 2){
      score[i] = score[i] + 12
    }
  }
  
  
  
  return(score)
  
}

Baseline function

Code
baseline_sim <- function(cards, n = 10000){
  all_scores <- c()
  for (i in 1:n){
    sim_grid <- generate_grid(cards)
    all_scores <- c(all_scores, solo_score(score_grid(sim_grid)))
  }
  return(all_scores)
}

Random Walk MCMC

Code
rw_mcmc <- function(grid, iterations = 1000, acceptance_func = "simple", beta = 0, bp = 500, original = NULL, record_board = FALSE){
  start_score <- solo_score(score_grid(grid))
  current_grid <- grid
  continue <- TRUE
  rows <- rep(1:nrow(grid))
  cols <- rep(1:ncol(grid))
  iter = 0
  score_vector <- c()
  highest_score <- start_score
  highest_grid <- grid
  highest_iter <- 0
  
  while(continue){
    current_score <- solo_score(score_grid(current_grid))
    if(bp == 0){
    }else if(iter%%bp == 0){
      if(acceptance_func == "annealing dynamic" || 
         acceptance_func == "delayed"){
        current_grid = highest_grid
        current_score = highest_score
      }
    }
    score_vector = c(score_vector, current_score)
    if(current_score > highest_score){
      highest_score = current_score
      highest_grid = current_grid
      highest_iter = iter
    }
    
    # Choosing swap locations if grid is partially complete
    if(is.null(original) == FALSE){
      blanks <- which(original == "x", TRUE)
      possible <- list()
      
      for (i in 1:nrow(blanks)){
        loc <- c(blanks[[i, 1]], blanks[[i, 2]])
        possible[[length(possible)+1]] = loc
      }
      
      start_loc <- sample(possible, 1)
      start_loc <- start_loc[[1]]
      start_row <- start_loc[1]
      start_col <- start_loc[2]
      
      end_loc <- sample(possible, 1)
      end_loc <- end_loc[[1]]
      end_row <- end_loc[1]
      end_col <- end_loc[2]
      
      while(current_grid[start_row, start_col] == current_grid[end_row, end_col]){
        end_loc <- sample(possible, 1)
        end_loc <- end_loc[[1]]
        end_row <- end_loc[1]
        end_col <- end_loc[2]
      }
    
    # Choosing swap locations without partial grid  
    }else{
      
      start_row <- sample(rows, 1)
      start_col <- sample(cols, 1)
      
      # Random swap anywhere
      end_row <- sample(rows, 1)
      end_col <- sample(cols, 1)
  
      while(current_grid[start_row, start_col] == current_grid[end_row, end_col]){
        end_row <- sample(rows, 1)
        end_col <- sample(cols, 1)
      }
    }
    # Adjacent swap only
    # possible_end <- find_cardinals(start_row, start_col, grid)
    # end <- sample(possible_end, 1)
    # end_row <- end[[1]][1]
    # end_col <- end[[1]][2]
    
    proposed_grid <- current_grid
    
    # if(iter == 347 || iter == 668){
    #   print(proposed_grid)
    # }
    
    start <- current_grid[start_row, start_col]
    end <- current_grid[end_row, end_col]
    
    proposed_grid[start_row, start_col] = end
    proposed_grid[end_row, end_col] = start
    
    proposed_score <- solo_score(score_grid(proposed_grid))
    
    if(acceptance_func == "simple"){
      p = proposed_score
      c = current_score
      if(p <= 0){
        p = p + -1*p + 1
        c = c + -1*p + 1
      }
      if(c <= 0){
        c = c + -1*c + 1
        p = p + -1*c + 1
      }
      
      x <- runif(1,0,1)
      if(x < p/c){
        current_grid <- proposed_grid
      }
    }
    
    if(acceptance_func == "annealing"){
      p = proposed_score
      c = current_score
      
      x <- runif(1,0,1)
      if(x < exp(beta*p)/exp(beta*c)){
        current_grid <- proposed_grid
      }
    }
    
    if(acceptance_func == "annealing dynamic"){
      p = proposed_score
      c = current_score
      
      factor = floor(iter/bp) + 1
      b = beta*(factor)
      
      x <- runif(1,0,1)
      if(x < exp(b*p)/exp(b*c)){
        current_grid <- proposed_grid
      }
    }
    
    if(acceptance_func == "delayed"){
      p = proposed_score
      c = current_score
      if(p <= 0){
        p = p + -1*p + 1
        c = c + -1*p + 1
      }
      if(c <= 0){
        c = c + -1*c + 1
        p = p + -1*c + 1
      }
      
      x <- runif(1,0,1)
      if(x < p/c && p/c < 1){
        p2 = proposed_score
        c2 = current_score
        
        factor = floor(iter/500) + 1
        b = beta*(factor)
        
        x <- runif(1,0,1)
        if(x < (exp(b*p2)*c)/(exp(b*c2)*p)){
          current_grid <- proposed_grid
        }
      }else if(p/c >= 1){
        current_grid <- proposed_grid
      }
    }
    
    iter = iter + 1
      if(iter >= iterations){
        continue = FALSE
      }
  }
  
  final_score <- solo_score(score_grid(current_grid))
  if(final_score > highest_score){
      highest_score = final_score
      highest_grid = grid
    }
  
  if(record_board == TRUE){
    return(c(t(highest_grid), highest_score))
  }else{
    return(c(highest_score, final_score, start_score, highest_iter, data.frame(score_vector)))
  }
}
Code
multi_mcmc <- function(iterations, n, acceptance_func = "simple", beta = 0, bp = 500, grid = NULL, boardlist = NULL, record_board = FALSE, cards = NULL, card_name = NULL){
  first = TRUE
  start_scores <- c()
  highest_scores <- c()
  highest_iter <- c()
  if(is.null(cards) == TRUE){
    cards <- c(rep("Bear", 12), 
           rep("Bee", 8), 
           rep("Meadow", 20),
           rep("Trout", 10),
           rep("Eagle", 8),
           rep("Rabbit", 8),
           rep("Dragonfly", 8),
           rep("Fox", 12),
           rep("Deer", 12),
           rep("Stream", 20),
           rep("Wolf", 12)
           )
    card_name = "default"
  }
  
  
  # Creating proper pool of cards if grid is partially complete
  if(is.null(grid) == FALSE){
    df <- as.data.frame(table(grid))
    animals <- levels(df$grid)
    for(i in 1:nrow(grid)){
      animal <- animals[i]
      num = length(cards[cards == animal])
      cards = cards[!cards == animal]
      cards <- c(cards, rep(animal, num - df[i,2]))
    }
  }
  
  for(i in 1:n){
    
    if(is.null(grid) == FALSE){
      
      if(record_board == TRUE){
        sim_grid <- generate_grid(cards, grid)
        run <- rw_mcmc(sim_grid, iterations, acceptance_func, beta, bp, grid, record_board = TRUE)
      }else{
        sim_grid <- generate_grid(cards, grid)
        run <- rw_mcmc(sim_grid, iterations, acceptance_func, beta, bp, grid)
      }
      
    }else if(is.null(boardlist) == FALSE){
      
      if(record_board == TRUE){
        run <- rw_mcmc(boardlist[[i]], iterations, acceptance_func, beta, bp, record_board = TRUE)
      }else{
        run <- rw_mcmc(boardlist[[i]], iterations, acceptance_func, beta, bp)
      }
      
    }else{
      if(record_board == TRUE){
        sim_grid <- generate_grid(cards)
        run <- rw_mcmc(sim_grid, iterations, acceptance_func, beta, bp, record_board = TRUE)
      }else{
        sim_grid <- generate_grid(cards)
        run <- rw_mcmc(sim_grid, iterations, acceptance_func, beta, bp)
      }
    }
    
    if(record_board == TRUE){
      if(first == TRUE){
        first = FALSE
        
        df <- data.frame(
          row1col1 = run[[1]],
          row1col2 = run[[2]],
          row1col3 = run[[3]],
          row1col4 = run[[4]],
          row1col5 = run[[5]],
          row2col1 = run[[6]],
          row2col2 = run[[7]],
          row2col3 = run[[8]],
          row2col4 = run[[9]],
          row2col5 = run[[10]],
          row3col1 = run[[11]],
          row3col2 = run[[12]],
          row3col3 = run[[13]],
          row3col4 = run[[14]],
          row3col5 = run[[15]],
          row4col1 = run[[16]],
          row4col2 = run[[17]],
          row4col3 = run[[18]],
          row4col4 = run[[19]],
          row4col5 = run[[20]],
          score = run[[21]],
          pool = card_name
        )
        
      }else{
        row <- data.frame(
          row1col1 = run[[1]],
          row1col2 = run[[2]],
          row1col3 = run[[3]],
          row1col4 = run[[4]],
          row1col5 = run[[5]],
          row2col1 = run[[6]],
          row2col2 = run[[7]],
          row2col3 = run[[8]],
          row2col4 = run[[9]],
          row2col5 = run[[10]],
          row3col1 = run[[11]],
          row3col2 = run[[12]],
          row3col3 = run[[13]],
          row3col4 = run[[14]],
          row3col5 = run[[15]],
          row4col1 = run[[16]],
          row4col2 = run[[17]],
          row4col3 = run[[18]],
          row4col4 = run[[19]],
          row4col5 = run[[20]],
          score = run[[21]],
          pool = card_name
        )
        df <- rbind(df, row) 
      }
    }else{
      start_scores <- c(start_scores, run[[3]])
      highest_scores <- c(highest_scores, run[[1]])
      highest_iter <- c(highest_iter, run[[4]])
    }
    
  }
  
  if(record_board == TRUE){
    return(df)
  }else{
    return(list(start_scores, highest_scores, highest_iter))
  }
  
}

Data Generation

Code
set.seed(4)
x1 <- multi_mcmc(2000, 100, "simple")
Code
mean(x1[[1]])
sd(x1[[1]])
var(x1[[1]])
max(x1[[1]])
min(x1[[1]])
summary(x1[[1]])
Code
mean(x1[[2]])
sd(x1[[2]])
var(x1[[2]])
max(x1[[2]])
min(x1[[2]])
summary(x1[[2]])
summary(x1[[3]])
Code
set.seed(4)
x2 <- multi_mcmc(2000, 100, "annealing", 0.8, 250)
Code
mean(x2[[1]])
sd(x2[[1]])
var(x2[[1]])
max(x2[[1]])
min(x2[[1]])
summary(x2[[1]])
Code
mean(x2[[2]])
sd(x2[[2]])
var(x2[[2]])
max(x2[[2]])
min(x2[[2]])
summary(x2[[2]])
summary(x2[[3]])
Code
set.seed(4)
x3 <- multi_mcmc(2000, 100, "annealing dynamic", 0.08, 250)
Code
mean(x3[[1]])
sd(x3[[1]])
var(x3[[1]])
max(x3[[1]])
min(x3[[1]])
summary(x3[[1]])
Code
mean(x3[[2]])
sd(x3[[2]])
var(x3[[2]])
max(x3[[2]])
min(x3[[2]])
summary(x3[[2]])
summary(x3[[3]])
Code
set.seed(4)
x4 <- multi_mcmc(2000, 100, "annealing dynamic", 0.2, 250)
Code
mean(x4[[1]])
sd(x4[[1]])
var(x4[[1]])
max(x4[[1]])
min(x4[[1]])
summary(x4[[1]])
Code
mean(x4[[2]])
sd(x4[[2]])
var(x4[[2]])
max(x4[[2]])
min(x4[[2]])
summary(x4[[2]])
Code
set.seed(49)
x5 <- multi_mcmc(2000, 100, "annealing", 0.8, 250)
Code
mean(x5[[1]])
sd(x5[[1]])
var(x5[[1]])
max(x5[[1]])
min(x5[[1]])
summary(x5[[1]])
Code
mean(x5[[2]])
sd(x5[[2]])
var(x5[[2]])
max(x5[[2]])
min(x5[[2]])
summary(x5[[2]])
summary(x5[[3]])
Code
set.seed(49)
x6 <- multi_mcmc(2000, 100, "delayed", 0.2, 250)
Code
mean(x6[[1]])
sd(x6[[1]])
var(x6[[1]])
max(x6[[1]])
min(x6[[1]])
summary(x6[[1]])
Code
mean(x6[[2]])
sd(x6[[2]])
var(x6[[2]])
max(x6[[2]])
min(x6[[2]])
summary(x6[[2]])
summary(x6[[3]])
Code
set.seed(49)
x7 <- multi_mcmc(1000, 1000, "annealing", 0.8, 250)
Code
mean(x7[[1]])
sd(x7[[1]])
var(x7[[1]])
max(x7[[1]])
min(x7[[1]])
summary(x7[[1]])
Code
mean(x7[[2]])
sd(x7[[2]])
var(x7[[2]])
max(x7[[2]])
min(x7[[2]])
summary(x7[[2]])
summary(x7[[3]])
Code
test_grid1 <- matrix(c("x", "Bee", "Meadow", "x", "x", "x", "Meadow", "Meadow", "x", "x",
                      "Deer", "Bee", "Meadow", "x", "x", "x", "Bear", "Bee", "Deer",
                      "x"),4,5,byrow=TRUE)
test_grid1
     [,1]   [,2]     [,3]     [,4]   [,5]
[1,] "x"    "Bee"    "Meadow" "x"    "x" 
[2,] "x"    "Meadow" "Meadow" "x"    "x" 
[3,] "Deer" "Bee"    "Meadow" "x"    "x" 
[4,] "x"    "Bear"   "Bee"    "Deer" "x" 
Code
set.seed(4)
partial1 <- multi_mcmc(1000, 100, "annealing", 0.8, 200, test_grid1)
Code
mean(partial1[[1]])
sd(partial1[[1]])
var(partial1[[1]])
max(partial1[[1]])
min(partial1[[1]])
summary(partial1[[1]])
Code
mean(partial1[[2]])
sd(partial1[[2]])
var(partial1[[2]])
max(partial1[[2]])
min(partial1[[2]])
summary(partial1[[2]])
summary(partial1[[3]])

Tuning

Code
tune <- function(iter, beta, bp, type, boardlist = NULL){
  beta_df <- c()
  bp_df <- c()
  iter_df <- c()
  start_score <- c()
  highest_score <- c()
  iter_at_highest <- c()
  highest_score_sd <- c()
  highest_iter_sd <- c()
  max_highest <- c()
  percentile_90 <- c()
  score_75_plus <- c()
  for(i in beta){
    for (j in bp){
      for(k in iter){
        if(is.null(boardlist) == FALSE){
          sim <- multi_mcmc(k, 100, type, i, j, boardlist = boardlist)
        }else{
          sim <- multi_mcmc(k, 100, type, i, j)
        }
        beta_df <- c(beta_df, i)
        bp_df <- c(bp_df, j)
        iter_df <- c(iter_df, k)
        start_score <- c(start_score, mean(sim[[1]]))
        highest_score <- c(highest_score, mean(sim[[2]]))
        iter_at_highest <- c(iter_at_highest, mean(sim[[3]]))
        highest_iter_sd <- c(highest_iter_sd, sd(sim[[3]]))
        highest_score_sd <- c(highest_score_sd, sd(sim[[2]]))
        max_highest <- c(max_highest, max(sim[[2]]))
        percentile_90 <- c(percentile_90, quantile(sim[[2]],probs=0.9))
        s <- sim[[2]]
        score_75_plus <- c(score_75_plus, length(s[s>=75]))
      }
      
    }
  }
  
  df <- data.frame(
    iterations = iter_df,
    beta = beta_df,
    break_point = bp_df,
    mean_start_score = start_score,
    mean_highest_score = highest_score,
    sd_highest_score = highest_score_sd,
    max_score = max_highest,
    score_90th_percent = percentile_90,
    score_75_plus = score_75_plus,
    average_iterations = iter_at_highest,
    sd_iterations = highest_iter_sd,
    type = type
  )
  
  return(df)
}
Code
beta <- c(0.3,0.8,0.9)
bp <- c(125, 250)
iter <- c(500, 750)

set.seed(4)
tune1 <- tune(iter, beta, bp, "delayed")
Code
tune1
Code
generate_100 <- function(pool){
  board_list <- list()
  for(i in 1:100){
    board <- generate_grid(pool)
    board_list[[length(board_list)+1]] = board
  }
  return(board_list)
}
Code
# set.seed(88)
set.seed(89)
board100 <- generate_100(cards)
Code
tune_exact1 <- tune(1000, 0.99, 250, "annealing dynamic", boardlist = board100)
tune_exact2 <- tune(1000, 0.9, 200, "annealing dynamic", boardlist = board100)
tune_exact3 <- tune(2000, 0.7, 500, "delayed", boardlist = board100)
tune_exact4 <- tune(2000, 0.8, 500, "annealing dynamic", boardlist = board100)
tune_exact5 <- tune(1500, 0.9, 500, "annealing dynamic", boardlist = board100)
tune_exact6 <- tune(750, 0.99, 250, "annealing dynamic", boardlist = board100)
tune_exact7 <- tune(750, 0.3, 250, "delayed", boardlist = board100)
tune_exact8 <- tune(500, 0.99, 250, "annealing dynamic", boardlist = board100)
tune_exact9 <- tune(2000, 0.9, 250, "annealing dynamic", boardlist = board100)
tune_exact10 <- tune(750, 0.9, 125, "annealing dynamic", boardlist = board100)
tune_exact11 <- tune(1500, 0.9, 250, "annealing dynamic", boardlist = board100)
Code
final_params <- rbind(tune_exact1,
                      tune_exact2,
                      tune_exact3,
                      tune_exact4,
                      tune_exact5,
                      tune_exact6,
                      tune_exact7,
                      tune_exact8,
                      tune_exact9,
                      tune_exact10,
                      tune_exact11)
Error: object 'tune_exact1' not found
Code
write.csv(final_params,here::here("final-parameters-seed2.csv"), row.names = FALSE)
Error in eval(expr, p): object 'final_params' not found
Code
# write.csv(tune1,here::here("new-parameters.csv"),row.names = FALSE)
Code
tuned_params1 <- read.csv(here::here("final-parameters.csv"))
Code
new_params <- rbind(tuned_params1, tune1)
write.csv(new_params,here::here("new-parameters.csv"), row.names = FALSE)
Code
set.seed(45)
sim_grid1 <- generate_grid(cards)
x <- rw_mcmc(sim_grid1, 2000, "annealing dynamic", beta = 0.8, 250)
Code
xz <- data.frame(iter = rep(1:length(x[[4]])), scores = x[[4]])
ggplot(aes(x = iter, y = scores), data = xz) +
  geom_line()

Database

Code
cards <- c(rep("Bear", 12), 
           rep("Bee", 8), 
           rep("Meadow", 20),
           rep("Trout", 10),
           rep("Eagle", 8),
           rep("Rabbit", 8),
           rep("Dragonfly", 8),
           rep("Fox", 12),
           rep("Deer", 12),
           rep("Stream", 20),
           rep("Wolf", 12)
           )

dfly_stream <- c(rep("Bear", 6), 
           rep("Bee", 4), 
           rep("Meadow", 10),
           rep("Trout", 5),
           rep("Eagle", 4),
           rep("Rabbit", 4),
           rep("Dragonfly", 8),
           rep("Fox", 6),
           rep("Deer", 6),
           rep("Stream", 20),
           rep("Wolf", 6)
           )

bee_meadow <- c(rep("Bear", 6), 
           rep("Bee", 8), 
           rep("Meadow", 20),
           rep("Trout", 5),
           rep("Eagle", 4),
           rep("Rabbit", 4),
           rep("Dragonfly", 4),
           rep("Fox", 6),
           rep("Deer", 6),
           rep("Stream", 10),
           rep("Wolf", 6)
           )

low_eag_rab <- c(rep("Bear", 12), 
           rep("Bee", 8), 
           rep("Meadow", 20),
           rep("Trout", 10),
           rep("Eagle", 2),
           rep("Rabbit", 2),
           rep("Dragonfly", 8),
           rep("Fox", 12),
           rep("Deer", 12),
           rep("Stream", 20),
           rep("Wolf", 12)
           )
Code
startTime <- Sys.time()

db_gen <- multi_mcmc(1000, 2000, "annealing dynamic", 0.9, 200, record_board = TRUE, cards = low_eag_rab, card_name = "low_eagle_rabbit")
endTime <- Sys.time()
print(endTime - startTime)

database_old <- read.csv(here::here("database.csv"))
database_new <- rbind(database_old, db_gen)
database_new <- database_new %>% distinct()
write.csv(database_new, here::here("database.csv"), row.names = FALSE)
Code
head(db_gen)
Code
database_new <- database_new %>% mutate(score = as.numeric(score))
Error: object 'database_new' not found
Code
# write.csv(db_gen, here::here("database.csv"), row.names = FALSE)
Code
database_old <- read.csv(here::here("database.csv"))
Code
database_new <- rbind(database_old, db_gen)
write.csv(database_new, here::here("database.csv"), row.names = FALSE)
Code
database_new <- database_new %>% distinct()
Code
database_new <- read.csv(here::here("database.csv"))
Code
database_new %>%
  mutate(score = as.numeric(score)) %>%
  filter(pool == "default") %>%
  ggplot(aes(x = score)) +
  geom_histogram(binwidth = 3, fill = "steelblue", color = "black")

Code
database_new %>%
  mutate(score = as.numeric(score)) %>%
  filter(pool == "dragonfly_stream") %>%
  ggplot(aes(x = score)) +
  geom_histogram(binwidth = 3, fill = "steelblue", color = "black")

Code
database_new %>%
  mutate(score = as.numeric(score)) %>%
  filter(pool == "bee_meadow") %>%
  ggplot(aes(x = score)) +
  geom_histogram(binwidth = 3, fill = "steelblue", color = "black")

Code
database_new %>%
  mutate(score = as.numeric(score)) %>%
  filter(pool == "low_eagle_rabbit") %>%
  ggplot(aes(x = score)) +
  geom_histogram(binwidth = 3, fill = "steelblue", color = "black")

Clustering

Code
# database_new <- read.csv(here::here("database.csv"))
# 
# database_new_ID <- database_new %>%
#   mutate(ID = rep(1:50000)) %>%
#   select(ID, 1:22)
# 
# write.csv(database_new_ID, here::here("database.csv"), row.names = FALSE)
Code
database_new <- read.csv(here::here("database.csv"))
Code
grids <- database_new %>% select(-c(ID, pool))

dmy <- dummyVars(" ~ .", data = grids)

grids <- data.frame(predict(dmy, newdata = grids))

grids_matrix <- as.matrix(grids)
Code
set.seed(4)
km_spec1 <- k_means(num_clusters = 4)

grids_recipe <- recipe(~., data = grids_matrix)

km_wflow1 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec1)

km_fitted1 <- km_wflow1 |> fit(grids_matrix)

km_fitted1 |> extract_centroids()
# A tibble: 4 × 222
  .cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
  <fct>           <dbl>       <dbl>        <dbl>             <dbl>         <dbl>
1 Cluster…       0.0700      0.0306        0.127            0.0474        0.0341
2 Cluster…       0.0520      0.0443        0.137            0.0810        0.0381
3 Cluster…       0.0958      0.0210        0.114            0.0341        0.0352
4 Cluster…       0.142       0.0121        0.108            0.0265        0.0535
# ℹ 216 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
#   row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
#   row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
#   row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
#   row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
#   row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
#   row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
Code
grids_km1 <- kmeans(grids_matrix, centers = 4)

grids_km1$totss
[1] 5052189
Code
grids_km1$withinss
[1] 371820.1 459162.4 448740.3 210847.5
Code
grids_km1$betweenss
[1] 3561619
Code
x <- km_fitted1 |> extract_centroids()
x
# A tibble: 4 × 222
  .cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
  <fct>           <dbl>       <dbl>        <dbl>             <dbl>         <dbl>
1 Cluster…       0.0700      0.0306        0.127            0.0474        0.0341
2 Cluster…       0.0520      0.0443        0.137            0.0810        0.0381
3 Cluster…       0.0958      0.0210        0.114            0.0341        0.0352
4 Cluster…       0.142       0.0121        0.108            0.0265        0.0535
# ℹ 216 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
#   row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
#   row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
#   row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
#   row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
#   row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
#   row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
Code
pc <- prcomp(grids_matrix)
Code
cumul_vars <- cumsum(pc$sdev^2)/sum(pc$sdev^2)
cumul_vars
  [1] 0.8262699 0.8344343 0.8401506 0.8447357 0.8481400 0.8512645 0.8543596
  [8] 0.8571382 0.8597179 0.8621789 0.8646002 0.8669165 0.8691075 0.8712399
 [15] 0.8733117 0.8753313 0.8773469 0.8793509 0.8812528 0.8830664 0.8848660
 [22] 0.8866272 0.8883736 0.8900891 0.8917286 0.8933377 0.8949434 0.8965283
 [29] 0.8979800 0.8994247 0.9008390 0.9022289 0.9035779 0.9049160 0.9062025
 [36] 0.9074680 0.9087246 0.9099753 0.9111725 0.9123417 0.9135056 0.9146304
 [43] 0.9157528 0.9168553 0.9179437 0.9190205 0.9200832 0.9211415 0.9221797
 [50] 0.9232034 0.9242053 0.9252057 0.9261971 0.9271803 0.9281464 0.9291076
 [57] 0.9300644 0.9310161 0.9319635 0.9328935 0.9338167 0.9347370 0.9356410
 [64] 0.9365347 0.9374233 0.9382853 0.9391459 0.9399874 0.9408102 0.9416306
 [71] 0.9424384 0.9432442 0.9440263 0.9447970 0.9455672 0.9463304 0.9470871
 [78] 0.9478354 0.9485749 0.9493072 0.9500344 0.9507528 0.9514698 0.9521774
 [85] 0.9528801 0.9535817 0.9542721 0.9549544 0.9556341 0.9563076 0.9569774
 [92] 0.9576409 0.9582959 0.9589498 0.9596002 0.9602391 0.9608735 0.9615018
 [99] 0.9621293 0.9627451 0.9633553 0.9639575 0.9645549 0.9651397 0.9657195
[106] 0.9662952 0.9668654 0.9674312 0.9679949 0.9685427 0.9690825 0.9696196
[113] 0.9701484 0.9706720 0.9711924 0.9717052 0.9722126 0.9727169 0.9732185
[120] 0.9737144 0.9742058 0.9746965 0.9751795 0.9756567 0.9761317 0.9766042
[127] 0.9770729 0.9775393 0.9780009 0.9784592 0.9789082 0.9793500 0.9797902
[134] 0.9802228 0.9806544 0.9810817 0.9815054 0.9819246 0.9823391 0.9827525
[141] 0.9831575 0.9835572 0.9839513 0.9843437 0.9847296 0.9851109 0.9854880
[148] 0.9858575 0.9862220 0.9865786 0.9869344 0.9872848 0.9876326 0.9879766
[155] 0.9883156 0.9886516 0.9889831 0.9893118 0.9896365 0.9899509 0.9902635
[162] 0.9905738 0.9908800 0.9911832 0.9914812 0.9917689 0.9920532 0.9923336
[169] 0.9926123 0.9928876 0.9931610 0.9934310 0.9936987 0.9939637 0.9942273
[176] 0.9944882 0.9947476 0.9950056 0.9952613 0.9955154 0.9957629 0.9960072
[183] 0.9962475 0.9964865 0.9967241 0.9969598 0.9971932 0.9974256 0.9976518
[190] 0.9978744 0.9980914 0.9983073 0.9985192 0.9987288 0.9989364 0.9991387
[197] 0.9993293 0.9995011 0.9996693 0.9998363 1.0000000 1.0000000 1.0000000
[204] 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000
[211] 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000
[218] 1.0000000 1.0000000 1.0000000 1.0000000
Code
grids_reduced <- pc$x[, 1:8]

grids_pca_km <- kmeans(grids_reduced, 4)

grids_pca_km$totss
[1] 4330425
Code
grids_pca_km$withinss
[1] 269715.5 127910.6 182482.4 189124.9
Code
grids_pca_km$betweenss
[1] 3561191
Code
pc$rotation[,1:2]
                            PC1           PC2
row1col1Bear       2.649657e-03  1.104530e-02
row1col1Bee       -1.158698e-03  3.047751e-02
row1col1Deer      -9.608389e-04  2.295259e-04
row1col1Dragonfly -2.468062e-03 -4.682377e-02
row1col1Eagle      1.824416e-04 -1.391958e-02
row1col1Fox        6.559876e-04 -8.397230e-03
row1col1Meadow    -1.617880e-03  1.233143e-01
row1col1Rabbit    -5.718403e-04 -1.134916e-02
row1col1Stream     2.717291e-05 -8.011985e-02
row1col1Trout     -6.903208e-04 -1.501071e-02
row1col1Wolf       3.952382e-03  1.055361e-02
row1col2Bear       1.916023e-03  1.010461e-02
row1col2Bee       -1.351101e-03  4.262760e-02
row1col2Deer       3.924640e-04  5.558147e-03
row1col2Dragonfly -2.072762e-03 -3.816086e-02
row1col2Eagle     -3.175132e-04 -1.769564e-02
row1col2Fox        1.362087e-03  1.408401e-04
row1col2Meadow    -8.005904e-04  1.321158e-01
row1col2Rabbit     1.037345e-05  4.010191e-03
row1col2Stream    -6.673661e-04 -1.156329e-01
row1col2Trout     -1.497311e-03 -3.164247e-02
row1col2Wolf       3.025695e-03  8.574621e-03
row1col3Bear       1.679487e-03  8.169932e-04
row1col3Bee       -1.244730e-03  1.390651e-03
row1col3Deer       7.720037e-04 -1.018956e-03
row1col3Dragonfly -2.557329e-03 -2.445883e-03
row1col3Eagle     -4.733135e-04 -1.393656e-03
row1col3Fox        1.345418e-03 -1.489911e-03
row1col3Meadow     2.545113e-04  4.632341e-03
row1col3Rabbit     6.186758e-05  1.915093e-03
row1col3Stream    -4.852718e-04 -4.707403e-03
row1col3Trout     -1.951277e-03  6.158401e-04
row1col3Wolf       2.598634e-03  1.684892e-03
row1col4Bear       2.293700e-03 -4.412166e-03
row1col4Bee       -1.396043e-03 -4.126496e-02
row1col4Deer       2.400714e-04 -6.394343e-03
row1col4Dragonfly -2.260096e-03  3.855945e-02
row1col4Eagle     -5.383542e-04  1.862509e-02
row1col4Fox        1.271799e-03 -4.512922e-03
row1col4Meadow    -6.018285e-04 -1.238800e-01
row1col4Rabbit     1.756345e-05 -4.800012e-03
row1col4Stream    -3.031215e-04  1.047287e-01
row1col4Trout     -1.519027e-03  3.031563e-02
row1col4Wolf       2.795336e-03 -6.964540e-03
row1col5Bear       2.703025e-03 -8.055195e-03
row1col5Bee       -1.288867e-03 -2.940842e-02
row1col5Deer      -1.328770e-03 -4.375605e-03
row1col5Dragonfly -2.169101e-03  4.491265e-02
row1col5Eagle      3.252771e-04  1.412125e-02
row1col5Fox        8.465991e-04  4.170017e-04
row1col5Meadow    -1.485325e-03 -1.164149e-01
row1col5Rabbit    -5.874975e-04  1.146956e-02
row1col5Stream    -1.465722e-05  7.826931e-02
row1col5Trout     -6.973340e-04  1.448302e-02
row1col5Wolf       3.696651e-03 -5.418622e-03
row2col1Bear       2.127058e-03  5.629902e-03
row2col1Bee       -1.432086e-03  6.459046e-02
row2col1Deer       4.460080e-04  3.478263e-03
row2col1Dragonfly -2.224559e-03 -5.989847e-02
row2col1Eagle     -3.594363e-04 -1.895993e-02
row2col1Fox        1.180679e-03 -4.479414e-03
row2col1Meadow    -1.922216e-03  2.309633e-01
row2col1Rabbit     1.777383e-04 -2.139114e-03
row2col1Stream     9.351417e-05 -1.713423e-01
row2col1Trout     -1.351358e-03 -5.142237e-02
row2col1Wolf       3.264658e-03  3.579621e-03
row2col2Bear       1.708217e-03  1.630698e-02
row2col2Bee       -1.913927e-03  1.047215e-01
row2col2Deer       9.395536e-04  7.002407e-03
row2col2Dragonfly -1.412518e-03 -3.290655e-02
row2col2Eagle     -3.618640e-04 -5.414834e-03
row2col2Fox        9.453663e-04  3.378233e-03
row2col2Meadow     6.454030e-04  1.975911e-01
row2col2Rabbit     6.528876e-04  2.153125e-03
row2col2Stream    -7.607296e-04 -2.054084e-01
row2col2Trout     -2.107437e-03 -9.332735e-02
row2col2Wolf       1.665048e-03  5.903820e-03
row2col3Bear       1.120035e-03  9.049514e-04
row2col3Bee       -7.339252e-04  1.001967e-03
row2col3Deer       8.005560e-04  1.106805e-03
row2col3Dragonfly -1.715858e-03 -5.495156e-04
row2col3Eagle     -3.648529e-04 -1.274841e-03
row2col3Fox        9.186916e-04  1.131443e-04
row2col3Meadow     1.179412e-03  1.194301e-03
row2col3Rabbit     6.406921e-04  7.522064e-04
row2col3Stream     2.614248e-06 -2.750386e-03
row2col3Trout     -3.304149e-03  7.044022e-05
row2col3Wolf       1.456783e-03 -5.690719e-04
row2col4Bear       1.587036e-03 -1.501841e-02
row2col4Bee       -1.652151e-03 -1.043086e-01
row2col4Deer       1.093581e-03 -5.677750e-03
row2col4Dragonfly -1.459588e-03  3.433253e-02
row2col4Eagle     -4.694642e-04  5.616182e-03
row2col4Fox        8.778038e-04 -5.249001e-03
row2col4Meadow     3.151903e-04 -1.926665e-01
row2col4Rabbit     5.667673e-04 -3.256551e-03
row2col4Stream    -3.017283e-04  1.999165e-01
row2col4Trout     -2.217066e-03  9.236426e-02
row2col4Wolf       1.659619e-03 -6.052632e-03
row2col5Bear       2.277521e-03 -8.221136e-03
row2col5Bee       -1.541476e-03 -6.234644e-02
row2col5Deer       6.546044e-04 -5.879182e-03
row2col5Dragonfly -2.191282e-03  5.814561e-02
row2col5Eagle     -5.182556e-04  2.192808e-02
row2col5Fox        1.272641e-03  1.833105e-03
row2col5Meadow    -1.858605e-03 -2.237812e-01
row2col5Rabbit     1.254353e-04  2.580762e-03
row2col5Stream    -1.847185e-04  1.730932e-01
row2col5Trout     -1.265220e-03  4.835563e-02
row2col5Wolf       3.229356e-03 -5.708395e-03
row3col1Bear       2.306924e-03  7.994807e-03
row3col1Bee       -1.531752e-03  6.323926e-02
row3col1Deer       6.647579e-04  4.154626e-03
row3col1Dragonfly -2.151379e-03 -5.965675e-02
row3col1Eagle     -3.621139e-04 -1.979888e-02
row3col1Fox        1.461167e-03 -3.351028e-03
row3col1Meadow    -2.211332e-03  2.263729e-01
row3col1Rabbit     1.844129e-04 -3.549851e-03
row3col1Stream     9.165953e-05 -1.693116e-01
row3col1Trout     -1.544134e-03 -5.122980e-02
row3col1Wolf       3.091790e-03  5.136352e-03
row3col2Bear       1.494113e-03  1.384646e-02
row3col2Bee       -1.922189e-03  1.024542e-01
row3col2Deer       1.021120e-03  5.366398e-03
row3col2Dragonfly -1.335315e-03 -3.549629e-02
row3col2Eagle     -2.601684e-04 -3.636211e-03
row3col2Fox        1.022202e-03  3.384010e-03
row3col2Meadow     2.588722e-04  1.942868e-01
row3col2Rabbit     6.551038e-04  2.739532e-03
row3col2Stream    -8.501485e-04 -2.012267e-01
row3col2Trout     -1.840446e-03 -8.781703e-02
row3col2Wolf       1.756857e-03  6.098867e-03
row3col3Bear       1.108852e-03  4.812706e-03
row3col3Bee       -7.967926e-04 -3.810522e-03
row3col3Deer       1.090767e-03  6.762310e-04
row3col3Dragonfly -2.016226e-03  1.083598e-03
row3col3Eagle     -3.650644e-04  9.184114e-04
row3col3Fox        8.572269e-04  7.106153e-05
row3col3Meadow     1.166387e-03 -5.475146e-03
row3col3Rabbit     6.474442e-04  2.930367e-04
row3col3Stream    -1.779278e-04  4.916941e-03
row3col3Trout     -3.143275e-03 -3.602121e-03
row3col3Wolf       1.628609e-03  1.158037e-04
row3col4Bear       1.791634e-03 -1.923391e-02
row3col4Bee       -1.713301e-03 -1.000108e-01
row3col4Deer       1.050551e-03 -6.882085e-03
row3col4Dragonfly -1.401983e-03  3.529881e-02
row3col4Eagle     -3.710658e-04  3.067195e-03
row3col4Fox        7.634127e-04 -3.402387e-03
row3col4Meadow     2.989192e-04 -2.010339e-01
row3col4Rabbit     6.703199e-04 -3.098337e-03
row3col4Stream    -6.369771e-04  2.036320e-01
row3col4Trout     -2.030981e-03  9.703521e-02
row3col4Wolf       1.579472e-03 -5.371776e-03
row3col5Bear       2.309500e-03 -7.464893e-03
row3col5Bee       -1.536044e-03 -6.461880e-02
row3col5Deer       8.185540e-04 -5.640715e-03
row3col5Dragonfly -2.008123e-03  5.880310e-02
row3col5Eagle     -3.064221e-04  1.935974e-02
row3col5Fox        1.161713e-03  4.000830e-03
row3col5Meadow    -2.022591e-03 -2.260417e-01
row3col5Rabbit     1.086869e-04  2.792596e-03
row3col5Stream     1.751042e-04  1.762938e-01
row3col5Trout     -1.621772e-03  4.963707e-02
row3col5Wolf       2.921395e-03 -7.121022e-03
row4col1Bear       2.633496e-03  9.914914e-03
row4col1Bee       -1.185664e-03  2.831810e-02
row4col1Deer      -1.123767e-03  1.053746e-02
row4col1Dragonfly -2.384150e-03 -4.721670e-02
row4col1Eagle      7.613280e-05 -1.316498e-02
row4col1Fox        9.946044e-04 -7.134276e-03
row4col1Meadow    -1.571362e-03  1.135271e-01
row4col1Rabbit    -5.519641e-04 -8.784153e-03
row4col1Stream     2.539753e-04 -7.891155e-02
row4col1Trout     -6.801618e-04 -1.481484e-02
row4col1Wolf       3.538860e-03  7.728876e-03
row4col2Bear       1.783397e-03  1.000896e-02
row4col2Bee       -1.124139e-03  3.823066e-02
row4col2Deer       3.061706e-04  9.272132e-03
row4col2Dragonfly -2.381098e-03 -3.676371e-02
row4col2Eagle     -1.474549e-04 -1.947806e-02
row4col2Fox        1.152432e-03  4.065746e-03
row4col2Meadow    -8.480964e-04  1.211754e-01
row4col2Rabbit     1.170003e-04  4.737714e-03
row4col2Stream     5.194001e-05 -1.107003e-01
row4col2Trout     -1.675275e-03 -3.201130e-02
row4col2Wolf       2.765124e-03  1.146282e-02
row4col3Bear       1.902913e-03 -1.938076e-03
row4col3Bee       -1.306573e-03 -2.242984e-03
row4col3Deer       8.238128e-04  3.766445e-03
row4col3Dragonfly -2.607222e-03  1.098951e-03
row4col3Eagle     -7.209925e-04 -3.601229e-05
row4col3Fox        1.113900e-03 -1.795936e-03
row4col3Meadow    -9.656987e-05 -4.379197e-03
row4col3Rabbit    -6.056850e-06 -1.727882e-03
row4col3Stream     9.610697e-05  2.819636e-03
row4col3Trout     -1.935461e-03  2.270058e-03
row4col3Wolf       2.736143e-03  2.164997e-03
row4col4Bear       1.667520e-03 -6.277657e-03
row4col4Bee       -1.295554e-03 -4.534049e-02
row4col4Deer       9.883757e-05 -1.041393e-02
row4col4Dragonfly -1.969235e-03  3.747668e-02
row4col4Eagle     -4.116733e-04  1.855025e-02
row4col4Fox        8.728438e-04 -2.910751e-03
row4col4Meadow    -4.633006e-04 -1.279565e-01
row4col4Rabbit     3.175449e-05 -4.498959e-03
row4col4Stream    -7.954553e-05  1.200424e-01
row4col4Trout     -1.591449e-03  3.109297e-02
row4col4Wolf       3.139802e-03 -9.763951e-03
row4col5Bear       2.554251e-03 -1.217946e-02
row4col5Bee       -1.244892e-03 -2.987918e-02
row4col5Deer      -8.923392e-04 -9.173750e-03
row4col5Dragonfly -2.406837e-03  4.944505e-02
row4col5Eagle     -4.276275e-05  1.466232e-02
row4col5Fox        7.774360e-04  1.135927e-02
row4col5Meadow    -1.579138e-03 -1.188984e-01
row4col5Rabbit    -5.172192e-04  8.419076e-03
row4col5Stream     2.506626e-04  8.309288e-02
row4col5Trout     -6.438973e-04  1.526172e-02
row4col5Wolf       3.744735e-03 -1.210952e-02
score             -9.997436e-01  2.358008e-06

With score removed

Code
# No lowest
grids <- database_new %>%
  filter(score > 61) %>%
  select(-c(ID, pool))

dmy <- dummyVars(" ~ .", data = grids)

grids <- data.frame(predict(dmy, newdata = grids))
Code
grids_noscore <- grids %>%
  select(-c(score))

noscore_matrix <- as.matrix(grids_noscore)
Code
# write.csv(grids_noscore, here::here("grids_noscore.csv"), row.names = FALSE)
Code
set.seed(4)
km_spec2 <- k_means(num_clusters = 3)
grids_recipe <- recipe(~., data = noscore_matrix)

km_wflow2 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec2)

km_fitted2 <- km_wflow2 |> fit(noscore_matrix)

km_fitted2 |> extract_centroids()
# A tibble: 3 × 221
  .cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
  <fct>           <dbl>       <dbl>        <dbl>             <dbl>         <dbl>
1 Cluster…       0.0693     0.0704        0.127             0.0144        0.0188
2 Cluster…       0.0530     0.00263       0.0878            0.142         0.0606
3 Cluster…       0.0612     0.0178        0.183             0.0536        0.0371
# ℹ 215 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
#   row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
#   row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
#   row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
#   row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
#   row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
#   row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
Code
grids_km2 <- kmeans(noscore_matrix, centers = 3)

grids_km2$totss
[1] 634714.3
Code
grids_km2$withinss
[1] 168666.4 262535.6 166322.3
Code
grids_km2$betweenss
[1] 37189.92
Code
set.seed(4)
km_spec2 <- k_means(num_clusters = 4)
grids_recipe <- recipe(~., data = noscore_matrix)

km_wflow2 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec2)

km_fitted2 <- km_wflow2 |> fit(noscore_matrix)

km_fitted2 |> extract_centroids()
# A tibble: 4 × 221
  .cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
  <fct>           <dbl>       <dbl>        <dbl>             <dbl>         <dbl>
1 Cluster…       0.0637     0.114         0.0669           0.00231       0.00760
2 Cluster…       0.0742     0.0192        0.189            0.0397        0.0342 
3 Cluster…       0.0533     0.00310       0.0816           0.143         0.0616 
4 Cluster…       0.0585     0.00865       0.190            0.0613        0.0398 
# ℹ 215 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
#   row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
#   row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
#   row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
#   row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
#   row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
#   row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
Code
grids_km2 <- kmeans(noscore_matrix, centers = 4)

grids_km2$totss
[1] 634714.3
Code
grids_km2$withinss
[1] 147510.1 146907.5 147161.9 145439.2
Code
grids_km2$betweenss
[1] 47695.6
Code
set.seed(4)
km_spec2 <- k_means(num_clusters = 5)
grids_recipe <- recipe(~., data = noscore_matrix)

km_wflow2 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec2)

km_fitted2 <- km_wflow2 |> fit(noscore_matrix)

km_fitted2 |> extract_centroids()
# A tibble: 5 × 221
  .cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
  <fct>           <dbl>       <dbl>        <dbl>             <dbl>         <dbl>
1 Cluster…       0.0624     0.116         0.0658           0.00180       0.00732
2 Cluster…       0.0749     0.0193        0.190            0.0386        0.0335 
3 Cluster…       0.0551     0.00358       0.0798           0.144         0.0626 
4 Cluster…       0.0856     0.00599       0.155            0.0837        0.0255 
5 Cluster…       0.0238     0.0116        0.202            0.0538        0.0612 
# ℹ 215 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
#   row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
#   row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
#   row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
#   row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
#   row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
#   row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
Code
grids_km2 <- kmeans(noscore_matrix, centers = 5)

grids_km2$totss
[1] 634714.3
Code
grids_km2$withinss
[1]  76104.60  96203.87 143008.40 124196.79 141839.81
Code
grids_km2$betweenss
[1] 53360.8
Code
set.seed(4)
km_spec2 <- k_means(num_clusters = 6)
grids_recipe <- recipe(~., data = noscore_matrix)

km_wflow2 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec2)

km_fitted2 <- km_wflow2 |> fit(noscore_matrix)

km_fitted2 |> extract_centroids()
# A tibble: 6 × 221
  .cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
  <fct>           <dbl>       <dbl>        <dbl>             <dbl>         <dbl>
1 Cluster…       0.0606     0.118         0.0626           0.00174       0.00741
2 Cluster…       0.0772     0.0195        0.192            0.0391        0.0325 
3 Cluster…       0.0649     0.00154       0.0890           0.158         0.0254 
4 Cluster…       0.0906     0.00749       0.167            0.0705        0.0294 
5 Cluster…       0.0375     0.00441       0.0827           0.123         0.0915 
6 Cluster…       0.0263     0.0137        0.210            0.0493        0.0476 
# ℹ 215 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
#   row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
#   row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
#   row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
#   row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
#   row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
#   row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
Code
grids_km2 <- kmeans(noscore_matrix, centers = 6)

grids_km2$totss
[1] 634714.3
Code
grids_km2$withinss
[1] 140257.59  85086.88 138441.52  88224.11  63939.32  60330.98
Code
grids_km2$betweenss
[1] 58433.85
Code
set.seed(4)
km_spec2 <- k_means(num_clusters = 7)
grids_recipe <- recipe(~., data = noscore_matrix)

km_wflow2 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec2)

km_fitted2 <- km_wflow2 |> fit(noscore_matrix)

km_fitted2 |> extract_centroids()
# A tibble: 7 × 221
  .cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
  <fct>           <dbl>       <dbl>        <dbl>             <dbl>         <dbl>
1 Cluster…      0.00924     0.162         0.0645           0.00250       0.00475
2 Cluster…      0.102       0.0107        0.167            0.0437        0.0199 
3 Cluster…      0.119       0.0696        0.0698           0.00440       0.00880
4 Cluster…      0.0630      0.00131       0.0884           0.152         0.0256 
5 Cluster…      0.0620      0.0100        0.205            0.0532        0.0389 
6 Cluster…      0.0390      0.00363       0.0852           0.122         0.0871 
7 Cluster…      0.0438      0.0297        0.203            0.0433        0.0457 
# ℹ 215 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
#   row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
#   row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
#   row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
#   row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
#   row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
#   row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
Code
grids_km2 <- kmeans(noscore_matrix, centers = 7)

grids_km2$totss
[1] 634714.3
Code
grids_km2$withinss
[1]  72114.74 102254.12 113256.43  72311.59  61848.85  87841.65  62295.00
Code
grids_km2$betweenss
[1] 62791.89
Code
km_fitted2 %>% extract_cluster_assignment()
# A tibble: 36,066 × 1
   .cluster 
   <fct>    
 1 Cluster_1
 2 Cluster_2
 3 Cluster_3
 4 Cluster_2
 5 Cluster_4
 6 Cluster_2
 7 Cluster_2
 8 Cluster_5
 9 Cluster_6
10 Cluster_2
# ℹ 36,056 more rows
Code
database_lowest_removed <- database_new %>%
  filter(score > 61)
Code
database1 <- database_lowest_removed %>%
  mutate(cluster = extract_cluster_assignment(km_fitted2)$.cluster)
Code
database1 %>%
  group_by(cluster) %>%
  summarise(mean_score=mean(score),
            sd_score=sd(score),
            count=n(),
            .groups = 'drop')
# A tibble: 7 × 4
  cluster   mean_score sd_score count
  <fct>          <dbl>    <dbl> <int>
1 Cluster_1       70.4     5.76  4003
2 Cluster_2       70.4     5.88  3822
3 Cluster_3       69.9     5.58  5227
4 Cluster_4       70.2     5.71  4571
5 Cluster_5       70.2     5.75  7293
6 Cluster_6       70.0     5.70  5788
7 Cluster_7       69.9     5.71  5362

With individual scores and lowest removed

Code
summary(database_new$score)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  17.00   61.00   67.00   65.79   72.00  116.00 
Code
database_new3 <- database_new %>%
  filter(score > 61) %>%
  select(-c(ID,pool,score))

ID_cols <- database_new %>%
  filter(score > 61) %>%
  select(ID)
Code
# Run once

first_row <- score_grid(matrix(c(t(database_new3[1,])),nrow=4,ncol=5,byrow=T), individual=TRUE)

db_individual <- data.frame(
  bear_score = c(first_row[1]),
  bee_score = c(first_row[2]),
  meadow_score = c(first_row[3]),
  trout_score = c(first_row[4]),
  eagle_score = c(first_row[5]),
  rabbit_score = c(first_row[6]),
  dragonfly_score = c(first_row[7]),
  fox_score = c(first_row[8]),
  deer_score = c(first_row[9]),
  stream_score = c(first_row[10]),
  wolves_score = c(first_row[11]),
  dv_score = c(first_row[12])
)

for(i in 2:nrow(database_new3)){
  row <- as.list(score_grid(matrix(c(t(database_new3[i,])),nrow=4,ncol=5,byrow=T), individual=TRUE))
  db_individual <- rbind(db_individual, row)
}
Code
# db_individual_ID <- db_individual %>%
#   mutate(ID = ID_cols$ID) %>%
#   select(ID, 1:12)
# 
# write.csv(db_individual_ID, here::here("db_individual_lowest_removed.csv"), row.names = FALSE)
Code
db_individual_removed <- read.csv(here::here("db_individual_lowest_removed.csv"))

db_individual_removed <- db_individual_removed %>%
  select(-ID)

individual_matrix <- as.matrix(db_individual_removed)
Code
set.seed(4)
km_spec3 <- k_means(num_clusters = 3)
grids_recipe <- recipe(~., data = individual_matrix)

km_wflow3 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec3)

km_fitted3 <- km_wflow3 |> fit(individual_matrix)

km_fitted3 |> extract_centroids()
# A tibble: 3 × 13
  .cluster  bear_score bee_score meadow_score trout_score eagle_score
  <fct>          <dbl>     <dbl>        <dbl>       <dbl>       <dbl>
1 Cluster_1       6.81      5.21         5.33        6.21        7.83
2 Cluster_2       4.04      4.24         4.82        6.09        3.25
3 Cluster_3       5.27     13.5         13.1         4.24        3.40
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
#   fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
#   dv_score <dbl>
Code
grids_km3 <- kmeans(individual_matrix, centers = 3)

grids_km3$totss
[1] 9516091
Code
grids_km3$withinss
[1] 2798274 1538188 1638007
Code
grids_km3$betweenss
[1] 3541622
Code
set.seed(4)
km_spec3 <- k_means(num_clusters = 4)
grids_recipe <- recipe(~., data = individual_matrix)

km_wflow3 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec3)

km_fitted3 <- km_wflow3 |> fit(individual_matrix)

km_fitted3 |> extract_centroids()
# A tibble: 4 × 13
  .cluster  bear_score bee_score meadow_score trout_score eagle_score
  <fct>          <dbl>     <dbl>        <dbl>       <dbl>       <dbl>
1 Cluster_1       5.78     13.2         12.7         4.22        3.11
2 Cluster_2       5.57      5.25         5.48        6.06        3.44
3 Cluster_3       6.85      5.11         5.46        6.41       11.8 
4 Cluster_4       3.39      3.75         4.29        6.05        2.98
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
#   fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
#   dv_score <dbl>
Code
grids_km3 <- kmeans(individual_matrix, centers = 4)

grids_km3$totss
[1] 9516091
Code
grids_km3$withinss
[1] 1434699 1606572 1554870  761314
Code
grids_km3$betweenss
[1] 4158636
Code
set.seed(4)
km_spec3 <- k_means(num_clusters = 5)
grids_recipe <- recipe(~., data = individual_matrix)

km_wflow3 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec3)

km_fitted3 <- km_wflow3 |> fit(individual_matrix)

km_fitted3 |> extract_centroids()
# A tibble: 5 × 13
  .cluster  bear_score bee_score meadow_score trout_score eagle_score
  <fct>          <dbl>     <dbl>        <dbl>       <dbl>       <dbl>
1 Cluster_1       9.50      7.46         5.14        5.61        3.90
2 Cluster_2       4.58      4.97         5.86        6.17        3.71
3 Cluster_3       4.75     14.0         14.1         4.11        3.37
4 Cluster_4       5.28      4.37         6.04        6.51       14.3 
5 Cluster_5       3.40      3.74         4.28        6.04        2.98
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
#   fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
#   dv_score <dbl>
Code
grids_km3 <- kmeans(individual_matrix, centers = 5)

grids_km3$totss
[1] 9516091
Code
grids_km3$withinss
[1] 1136207.9 1126869.3  758003.9 1006652.1  927066.5
Code
grids_km3$betweenss
[1] 4561291
Code
set.seed(4)
km_spec3 <- k_means(num_clusters = 6)
grids_recipe <- recipe(~., data = individual_matrix)

km_wflow3 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec3)

km_fitted3 <- km_wflow3 |> fit(individual_matrix)

km_fitted3 |> extract_centroids()
# A tibble: 6 × 13
  .cluster  bear_score bee_score meadow_score trout_score eagle_score
  <fct>          <dbl>     <dbl>        <dbl>       <dbl>       <dbl>
1 Cluster_1      11.6       8.54         4.67        5.89        4.35
2 Cluster_2       4.51      4.90         5.29        6.24        3.70
3 Cluster_3       4.61     14.1         14.1         4.13        3.39
4 Cluster_4       5.31      4.59         5.61        6.50       15.2 
5 Cluster_5       4.35      4.61         6.91        5.63        3.96
6 Cluster_6       3.03      3.25         3.81        5.99        2.61
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
#   fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
#   dv_score <dbl>
Code
grids_km3 <- kmeans(individual_matrix, centers = 6)

grids_km3$totss
[1] 9516091
Code
grids_km3$withinss
[1]  759440.3  409600.7  747652.9  905431.2  778196.0 1082726.5
Code
grids_km3$betweenss
[1] 4833044
Code
set.seed(4)
km_spec3 <- k_means(num_clusters = 7)
grids_recipe <- recipe(~., data = individual_matrix)

km_wflow3 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec3)

km_fitted3 <- km_wflow3 |> fit(individual_matrix)

km_fitted3 |> extract_centroids()
# A tibble: 7 × 13
  .cluster  bear_score bee_score meadow_score trout_score eagle_score
  <fct>          <dbl>     <dbl>        <dbl>       <dbl>       <dbl>
1 Cluster_1      12.4       7.96         5.03        5.96        4.35
2 Cluster_2       4.54      4.79         5.34        6.24        3.69
3 Cluster_3       4.95     18.9         12.9         3.75        3.14
4 Cluster_4       4.40      8.61        13.9         4.75        4.01
5 Cluster_5       5.36      4.58         5.36        6.51       15.4 
6 Cluster_6       4.68      5.33         4.63        5.78        4.02
7 Cluster_7       3.03      3.24         3.80        6.00        2.61
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
#   fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
#   dv_score <dbl>
Code
grids_km3 <- kmeans(individual_matrix, centers = 7)

grids_km3$totss
[1] 9516091
Code
grids_km3$withinss
[1] 409399.1 641115.4 720996.2 587674.1 739842.3 869105.4 518338.6
Code
grids_km3$betweenss
[1] 5029620
Code
centroids <- km_fitted3 |> extract_centroids()
centroids
# A tibble: 7 × 13
  .cluster  bear_score bee_score meadow_score trout_score eagle_score
  <fct>          <dbl>     <dbl>        <dbl>       <dbl>       <dbl>
1 Cluster_1      12.4       7.96         5.03        5.96        4.35
2 Cluster_2       4.54      4.79         5.34        6.24        3.69
3 Cluster_3       4.95     18.9         12.9         3.75        3.14
4 Cluster_4       4.40      8.61        13.9         4.75        4.01
5 Cluster_5       5.36      4.58         5.36        6.51       15.4 
6 Cluster_6       4.68      5.33         4.63        5.78        4.02
7 Cluster_7       3.03      3.24         3.80        6.00        2.61
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
#   fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
#   dv_score <dbl>
Code
database_lowest_removed <- database_new %>%
  filter(score > 61)
Code
database3 <- database_lowest_removed %>%
  mutate(cluster = extract_cluster_assignment(km_fitted3)$.cluster)
Code
database3 %>%
  group_by(cluster) %>%
  summarize(mean_score=mean(score),
            sd_score=sd(score),
            p25th=quantile(score,probs=0.25),
            median=quantile(score,probs=0.5),
            p75th=quantile(score,probs=0.75),
            count=n(),
            .groups = 'drop')
# A tibble: 7 × 7
  cluster   mean_score sd_score p25th median p75th count
  <fct>          <dbl>    <dbl> <dbl>  <dbl> <dbl> <int>
1 Cluster_1       67.9     4.07    65     68    71  5086
2 Cluster_2       71.6     5.49    67     71    76  6745
3 Cluster_3       71.8     5.05    68     72    75  3806
4 Cluster_4       69.2     4.65    65     69    73  6752
5 Cluster_5       69.7     4.66    66     69    73  5252
6 Cluster_6       67.1     3.64    64     67    70  6127
7 Cluster_7       79.4     8.09    74     79    85  2298

Normalizing

Code
db_individual_removed <- read.csv(here::here("db_individual_lowest_removed.csv"))

db_individual_removed <- db_individual_removed %>%
  select(-ID)
Code
db_scaled <- data.frame(lapply(db_individual_removed,scale))
Code
individual_matrix <- as.matrix(db_scaled)
Code
set.seed(4)
km_spec3 <- k_means(num_clusters = 3)
grids_recipe <- recipe(~., data = individual_matrix)

km_wflow3 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec3)

km_fitted3 <- km_wflow3 |> fit(individual_matrix)

km_fitted3 |> extract_centroids()
# A tibble: 3 × 13
  .cluster  bear_score bee_score meadow_score trout_score eagle_score
  <fct>          <dbl>     <dbl>        <dbl>       <dbl>       <dbl>
1 Cluster_1     0.0892     0.747        0.734      -0.351      -0.476
2 Cluster_2    -0.317     -0.546       -0.474       0.288      -0.450
3 Cluster_3     0.156     -0.382       -0.425       0.155       0.880
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
#   fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
#   dv_score <dbl>
Code
grids_km3 <- kmeans(individual_matrix, centers = 3)

grids_km3$totss
[1] 432780
Code
grids_km3$withinss
[1] 115486.2 125410.6 101487.0
Code
grids_km3$betweenss
[1] 90396.15
Code
set.seed(4)
km_spec3 <- k_means(num_clusters = 4)
grids_recipe <- recipe(~., data = individual_matrix)

km_wflow3 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec3)

km_fitted3 <- km_wflow3 |> fit(individual_matrix)

km_fitted3 |> extract_centroids()
# A tibble: 4 × 13
  .cluster  bear_score bee_score meadow_score trout_score eagle_score
  <fct>          <dbl>     <dbl>        <dbl>       <dbl>       <dbl>
1 Cluster_1      0.565    -0.186       -0.412      0.0645      -0.297
2 Cluster_2     -0.196     1.08         1.17      -0.422       -0.418
3 Cluster_3     -0.158    -0.378       -0.286      0.0755       1.29 
4 Cluster_4     -0.464    -0.572       -0.433      0.312       -0.447
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
#   fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
#   dv_score <dbl>
Code
grids_km3 <- kmeans(individual_matrix, centers = 4)

grids_km3$totss
[1] 432780
Code
grids_km3$withinss
[1] 72386.03 92085.26 76858.79 77842.34
Code
grids_km3$betweenss
[1] 113607.6
Code
set.seed(4)
km_spec3 <- k_means(num_clusters = 5)
grids_recipe <- recipe(~., data = individual_matrix)

km_wflow3 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec3)

km_fitted3 <- km_wflow3 |> fit(individual_matrix)

km_fitted3 |> extract_centroids()
# A tibble: 5 × 13
  .cluster  bear_score bee_score meadow_score trout_score eagle_score
  <fct>          <dbl>     <dbl>        <dbl>       <dbl>       <dbl>
1 Cluster_1      1.48    0.00374       -0.374      0.197       -0.124
2 Cluster_2     -0.312  -0.322         -0.306     -0.0824      -0.326
3 Cluster_3     -0.277   1.14           1.22      -0.431       -0.415
4 Cluster_4     -0.204  -0.390         -0.276      0.105        1.44 
5 Cluster_5     -0.451  -0.582         -0.459      0.353       -0.442
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
#   fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
#   dv_score <dbl>
Code
grids_km3 <- kmeans(individual_matrix, centers = 5)

grids_km3$totss
[1] 432780
Code
grids_km3$withinss
[1] 56580.06 70998.24 43048.48 72480.97 60870.76
Code
grids_km3$betweenss
[1] 128801.5
Code
set.seed(4)
km_spec3 <- k_means(num_clusters = 6)
grids_recipe <- recipe(~., data = individual_matrix)

km_wflow3 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec3)

km_fitted3 <- km_wflow3 |> fit(individual_matrix)

km_fitted3 |> extract_centroids()
# A tibble: 6 × 13
  .cluster  bear_score bee_score meadow_score trout_score eagle_score
  <fct>          <dbl>     <dbl>        <dbl>       <dbl>       <dbl>
1 Cluster_1      1.55     0.0217       -0.355      0.187       -0.127
2 Cluster_2     -0.336   -0.410        -0.434      0.235       -0.248
3 Cluster_3     -0.271    1.17          1.24      -0.434       -0.401
4 Cluster_4     -0.174   -0.385        -0.283      0.0957       1.56 
5 Cluster_5     -0.250   -0.250        -0.217     -0.156       -0.275
6 Cluster_6     -0.479   -0.645        -0.361      0.335       -0.590
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
#   fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
#   dv_score <dbl>
Code
grids_km3 <- kmeans(individual_matrix, centers = 6)

grids_km3$totss
[1] 432780
Code
grids_km3$withinss
[1] 45579.53 38851.64 45487.45 52669.82 45162.10 63515.07
Code
grids_km3$betweenss
[1] 141514.4
Code
set.seed(4)
km_spec3 <- k_means(num_clusters = 7)
grids_recipe <- recipe(~., data = individual_matrix)

km_wflow3 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec3)

km_fitted3 <- km_wflow3 |> fit(individual_matrix)

km_fitted3 |> extract_centroids()
# A tibble: 7 × 13
  .cluster  bear_score bee_score meadow_score trout_score eagle_score
  <fct>          <dbl>     <dbl>        <dbl>       <dbl>       <dbl>
1 Cluster_1      1.66     0.0356       -0.343      0.167       -0.109
2 Cluster_2     -0.152   -0.160        -0.113     -0.0776      -0.204
3 Cluster_3     -0.260    1.26          1.27      -0.453       -0.405
4 Cluster_4     -0.173   -0.382        -0.281      0.0888       1.63 
5 Cluster_5     -0.224   -0.219        -0.162     -0.109       -0.240
6 Cluster_6     -0.333   -0.417        -0.451      0.275       -0.249
7 Cluster_7     -0.487   -0.663        -0.389      0.343       -0.593
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
#   fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
#   dv_score <dbl>
Code
grids_km3 <- kmeans(individual_matrix, centers = 7)

grids_km3$totss
[1] 432780
Code
grids_km3$withinss
[1] 34441.26 44327.60 48032.11 39527.74 28056.57 43783.01 42281.56
Code
grids_km3$betweenss
[1] 152330.2
Code
centroids <- km_fitted3 |> extract_centroids()
centroids
# A tibble: 7 × 13
  .cluster  bear_score bee_score meadow_score trout_score eagle_score
  <fct>          <dbl>     <dbl>        <dbl>       <dbl>       <dbl>
1 Cluster_1      1.66     0.0356       -0.343      0.167       -0.109
2 Cluster_2     -0.152   -0.160        -0.113     -0.0776      -0.204
3 Cluster_3     -0.260    1.26          1.27      -0.453       -0.405
4 Cluster_4     -0.173   -0.382        -0.281      0.0888       1.63 
5 Cluster_5     -0.224   -0.219        -0.162     -0.109       -0.240
6 Cluster_6     -0.333   -0.417        -0.451      0.275       -0.249
7 Cluster_7     -0.487   -0.663        -0.389      0.343       -0.593
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
#   fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
#   dv_score <dbl>
Code
database_new %>% summarize(mean_score=mean(score),
            sd_score=sd(score),
            min=quantile(score, probs=0),
            p25th=quantile(score,probs=0.25),
            median=quantile(score,probs=0.5),
            p75th=quantile(score,probs=0.75),
            max=quantile(score, probs=1)
            )
  mean_score sd_score min p25th median p75th max
1   65.79404 9.135006  17    61     67    72 116
Code
database_lowest_removed <- database_new %>%
  filter(score > 61)
Code
database3 <- database_lowest_removed %>%
  mutate(`7cluster` = extract_cluster_assignment(km_fitted3)$.cluster)
Code
database3 <- database3 %>%
  mutate(`3cluster` = extract_cluster_assignment(km_fitted3)$.cluster)
Code
# write.csv(database3, here::here("normalized_clusters.csv"), row.names = FALSE)
Code
cluster_data <- read.csv(here::here("normalized_clusters.csv"))
Code
cluster_data %>%
  group_by(X7cluster) %>%
  summarize(mean_score=mean(score),
            sd_score=sd(score),
            p25th=quantile(score,probs=0.25),
            median=quantile(score,probs=0.5),
            p75th=quantile(score,probs=0.75),
            count=n(),
            .groups = 'drop')
# A tibble: 7 × 7
  X7cluster mean_score sd_score p25th median p75th count
  <chr>          <dbl>    <dbl> <dbl>  <dbl> <dbl> <int>
1 Cluster_1       68.8     4.44    65     68    72  4887
2 Cluster_2       67.9     4.10    65     67    71  4291
3 Cluster_3       70.9     5.06    67     71    75  6671
4 Cluster_4       70.0     4.80    66     70    73  5405
5 Cluster_5       67.8     4.12    64     67    71  5800
6 Cluster_6       73.2     6.73    68     73    78  5870
7 Cluster_7       72.4     8.29    66     71    77  3142

Inferences

Code
count_prop <- function(card_name, database){
  df_pos <- data.frame(
    row1col1 = as.numeric(nrow(database %>% filter(row1col1 == card_name))),
    row1col2 = as.numeric(nrow(database %>% filter(row1col2 == card_name))),
    row1col3 = as.numeric(nrow(database %>% filter(row1col3 == card_name))),
    row1col4 = as.numeric(nrow(database %>% filter(row1col4 == card_name))),
    row1col5 = as.numeric(nrow(database %>% filter(row1col5 == card_name))),
    row2col1 = as.numeric(nrow(database %>% filter(row2col1 == card_name))),
    row2col2 = as.numeric(nrow(database %>% filter(row2col2 == card_name))),
    row2col3 = as.numeric(nrow(database %>% filter(row2col3 == card_name))),
    row2col4 = as.numeric(nrow(database %>% filter(row2col4 == card_name))),
    row2col5 = as.numeric(nrow(database %>% filter(row2col5 == card_name))),
    row3col1 = as.numeric(nrow(database %>% filter(row3col1 == card_name))),
    row3col2 = as.numeric(nrow(database %>% filter(row3col2 == card_name))),
    row3col3 = as.numeric(nrow(database %>% filter(row3col3 == card_name))),
    row3col4 = as.numeric(nrow(database %>% filter(row3col4 == card_name))),
    row3col5 = as.numeric(nrow(database %>% filter(row3col5 == card_name))),
    row4col1 = as.numeric(nrow(database %>% filter(row4col1 == card_name))),
    row4col2 = as.numeric(nrow(database %>% filter(row4col2 == card_name))),
    row4col3 = as.numeric(nrow(database %>% filter(row4col3 == card_name))),
    row4col4 = as.numeric(nrow(database %>% filter(row4col4 == card_name))),
    row4col5 = as.numeric(nrow(database %>% filter(row4col5 == card_name)))
  )
  
  df_pos_per <- apply(df_pos, 1, function(x) x/sum(x))

  row <- c("row1", "row2", "row3", "row4")
  col <- c("col1", "col2", "col3", "col4", "col5")
  df_hm <- expand.grid(col = col, row = row)
  df_hm <- df_hm %>%
    mutate(proportion = df_pos_per[,1])
  
  return(df_hm)
  
}
Code
# bear_pos <- data.frame(
#   row1col1 = as.numeric(nrow(database_new %>% filter(row1col1 == "Bear"))),
#   row1col2 = as.numeric(nrow(database_new %>% filter(row1col2 == "Bear"))),
#   row1col3 = as.numeric(nrow(database_new %>% filter(row1col3 == "Bear"))),
#   row1col4 = as.numeric(nrow(database_new %>% filter(row1col4 == "Bear"))),
#   row1col5 = as.numeric(nrow(database_new %>% filter(row1col5 == "Bear"))),
#   row2col1 = as.numeric(nrow(database_new %>% filter(row2col1 == "Bear"))),
#   row2col2 = as.numeric(nrow(database_new %>% filter(row2col2 == "Bear"))),
#   row2col3 = as.numeric(nrow(database_new %>% filter(row2col3 == "Bear"))),
#   row2col4 = as.numeric(nrow(database_new %>% filter(row2col4 == "Bear"))),
#   row2col5 = as.numeric(nrow(database_new %>% filter(row2col5 == "Bear"))),
#   row3col1 = as.numeric(nrow(database_new %>% filter(row3col1 == "Bear"))),
#   row3col2 = as.numeric(nrow(database_new %>% filter(row3col2 == "Bear"))),
#   row3col3 = as.numeric(nrow(database_new %>% filter(row3col3 == "Bear"))),
#   row3col4 = as.numeric(nrow(database_new %>% filter(row3col4 == "Bear"))),
#   row3col5 = as.numeric(nrow(database_new %>% filter(row3col5 == "Bear"))),
#   row4col1 = as.numeric(nrow(database_new %>% filter(row4col1 == "Bear"))),
#   row4col2 = as.numeric(nrow(database_new %>% filter(row4col2 == "Bear"))),
#   row4col3 = as.numeric(nrow(database_new %>% filter(row4col3 == "Bear"))),
#   row4col4 = as.numeric(nrow(database_new %>% filter(row4col4 == "Bear"))),
#   row4col5 = as.numeric(nrow(database_new %>% filter(row4col5 == "Bear")))
# )
# 
# bear_pos_per <- apply(bear_pos, 1, function(x) x/sum(x))
# 
# row <- c("row1", "row2", "row3", "row4")
# col <- c("col1", "col2", "col3", "col4", "col5")
# bear_hm <- expand.grid(col = col, row = row)
# bear_hm <- bear_hm %>%
#   mutate(proportion = bear_pos_per[,1])
Code
make_heatmap <- function(df, title){
  ggplot(aes(x=col, y=row, fill=proportion), data=df) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  labs(title=title)
}
Code
make_heatmap(count_prop("Bear", database_new), "Bear")

Code
make_heatmap(count_prop("Bee", database_new), "Bee")

Code
make_heatmap(count_prop("Meadow", database_new), "Meadow")

Code
make_heatmap(count_prop("Trout", database_new), "Trout")

Code
make_heatmap(count_prop("Eagle", database_new), "Eagle")

Code
make_heatmap(count_prop("Rabbit", database_new), "Rabbit")

Code
make_heatmap(count_prop("Dragonfly", database_new), "Dragonfly")

Code
make_heatmap(count_prop("Fox", database_new), "Fox")

Code
make_heatmap(count_prop("Deer", database_new), "Deer")

Code
make_heatmap(count_prop("Stream", database_new), "Stream")

Code
make_heatmap(count_prop("Wolf", database_new), "Wolf")

Code
bind_7clusters <- function(cluster_num){
  rbind(count_prop("Bear", 
           df <- cluster_data %>%
              filter(X7cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Bear"),
        count_prop("Bee", 
           df <- cluster_data %>%
              filter(X7cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Bee"),
        count_prop("Meadow", 
           df <- cluster_data %>%
              filter(X7cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Meadow"),
        count_prop("Trout", 
           df <- cluster_data %>%
              filter(X7cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Trout"),
        count_prop("Eagle", 
           df <- cluster_data %>%
              filter(X7cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Eagle"),
        count_prop("Rabbit", 
           df <- cluster_data %>%
              filter(X7cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Rabbit"),
        count_prop("Dragonfly", 
           df <- cluster_data %>%
              filter(X7cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Dragonfly"),
        count_prop("Fox", 
           df <- cluster_data %>%
              filter(X7cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Fox"),
        count_prop("Deer", 
           df <- cluster_data %>%
              filter(X7cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Deer"),
        count_prop("Stream", 
           df <- cluster_data %>%
              filter(X7cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Stream"),
        count_prop("Wolf", 
           df <- cluster_data %>%
              filter(X7cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Wolf")
  )
}
Code
cluster_position <- rbind(bind_7clusters("Cluster_1"),
                          bind_7clusters("Cluster_2"),
                          bind_7clusters("Cluster_3"),
                          bind_7clusters("Cluster_4"),
                          bind_7clusters("Cluster_5"),
                          bind_7clusters("Cluster_6"),
                          bind_7clusters("Cluster_7")
                    )
Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position %>% filter(card == "Bear"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(cluster)) +
  labs(title="Bear")

Code
overall_position <- rbind(count_prop("Bear", database_new) %>% mutate(card = "Bear"),
                          count_prop("Bee", database_new) %>% mutate(card = "Bee"),
                          count_prop("Meadow", database_new) %>% mutate(card = "Meadow"),
                          count_prop("Trout", database_new) %>% mutate(card = "Trout"),
                          count_prop("Eagle", database_new) %>% mutate(card = "Eagle"),
                          count_prop("Rabbit", database_new) %>% mutate(card = "Rabbit"),
                          count_prop("Dragonfly", database_new) %>% mutate(card = "Dragonfly"),
                          count_prop("Fox", database_new) %>% mutate(card = "Fox"),
                          count_prop("Deer", database_new) %>% mutate(card = "Deer"),
                          count_prop("Stream", database_new) %>% mutate(card = "Stream"),
                          count_prop("Wolf", database_new) %>% mutate(card = "Wolf")
                          )
Code
ggplot(aes(x=col, y=row, fill=proportion), data=(overall_position)) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(card)) +
  labs(title="Bear positions across clusters")

Cluster Inferences

Code
cluster_table <- function(Xcluster, cluster_num, cluster_data){
  df <- cluster_data %>%
    filter(
      case_when(
        Xcluster == 7 ~ X7cluster == cluster_num,
        Xcluster == 6 ~ X6cluster == cluster_num,
        Xcluster == 5 ~ X5cluster == cluster_num,
        Xcluster == 4 ~ X4cluster == cluster_num,
        Xcluster == 3 ~ X3cluster == cluster_num
      )
    ) %>%
    summarize(mean_score=mean(score),
              sd_score=sd(score),
              p25th=quantile(score,probs=0.25),
              median=quantile(score,probs=0.5),
              p75th=quantile(score,probs=0.75),
              count=n(),
              .groups = 'drop')
  return(df)
}
Code
cluster_prop <- function(Xcluster, cluster_num, database){
  df <- database %>%
    filter(
      case_when(
        Xcluster == 7 ~ X7cluster == cluster_num,
        Xcluster == 6 ~ X6cluster == cluster_num,
        Xcluster == 5 ~ X5cluster == cluster_num,
        Xcluster == 4 ~ X4cluster == cluster_num,
        Xcluster == 3 ~ X3cluster == cluster_num
      )
    )
  
  bear_count = 0
  bee_count = 0
  meadow_count = 0
  trout_count = 0
  eagle_count = 0
  rabbit_count = 0
  dragonfly_count = 0
  fox_count = 0
  deer_count = 0
  stream_count = 0
  wolf_count = 0
      
  for (j in 2:21){
    for(i in 1:nrow(df)){
      
      if(df[i,j] == "Bear"){
        bear_count = bear_count + 1
      }else if(df[i,j] == "Bee"){
        bee_count = bee_count + 1
      }else if(df[i,j] == "Meadow"){
        meadow_count = meadow_count + 1
      }else if(df[i,j] == "Trout"){
        trout_count = trout_count + 1
      }else if(df[i,j] == "Eagle"){
        eagle_count = eagle_count + 1
      }else if(df[i,j] == "Rabbit"){
        rabbit_count = rabbit_count + 1
      }else if(df[i,j] == "Dragonfly"){
        dragonfly_count = dragonfly_count + 1
      }else if(df[i,j] == "Fox"){
        fox_count = fox_count + 1
      }else if(df[i,j] == "Deer"){
        deer_count = deer_count + 1
      }else if(df[i,j] == "Stream"){
        stream_count = stream_count + 1
      }else if(df[i,j] == "Wolf"){
        wolf_count = wolf_count + 1
      }else{
        print("bugged")
      }
      
    }
  }
  
  df_prop <- data.frame(
    name = c("bear", "bee", "meadow", "trout", "eagle", "rabbit",
               "dragonfly", "fox", "deer", "stream", "wolf"),
    proportion = c(bear_count/(nrow(df)*20),
                bee_count/(nrow(df)*20),
                meadow_count/(nrow(df)*20),
                trout_count/(nrow(df)*20),
                eagle_count/(nrow(df)*20),
                rabbit_count/(nrow(df)*20),
                dragonfly_count/(nrow(df)*20),
                fox_count/(nrow(df)*20),
                deer_count/(nrow(df)*20),
                stream_count/(nrow(df)*20),
                wolf_count/(nrow(df)*20)),
    true_prop = c(12/130,
                  8/130,
                  20/130,
                  10/130,
                  8/130,
                  8/130,
                  8/130,
                  12/130,
                  12/130,
                  20/130,
                  12/130)
  )
  
  return(df_prop)
}
Code
make_bars <- function(df, title){
  ggplot(aes(x = reorder(name, -proportion), y = proportion, fill = reorder(name, -proportion)), data=df) + 
    geom_bar(stat = "identity") +
    scale_fill_brewer(palette="PRGn", direction = -1) +
    labs(x = "card", title=title) +
    theme(legend.position = "none")
}
Code
exact_card_count <- function(Xcluster=NULL, cluster_num=NULL, database){

  bear_exact <- rep(0, 13)
  bee_exact <- rep(0, 9)
  meadow_exact <- rep(0, 21)
  trout_exact <- rep(0, 11)
  eagle_exact <- rep(0, 9)
  rabbit_exact <- rep(0, 9)
  dragonfly_exact <- rep(0, 9)
  fox_exact <- rep(0, 13)
  deer_exact <- rep(0, 13)
  stream_exact <- rep(0, 21)
  wolf_exact <- rep(0, 13)
  
  if(is.null(Xcluster) && is.null(cluster_num)){
    df <- database %>%
      filter(pool == "default")
  }else{
    df <- database %>%
    filter(
      case_when(
        Xcluster == 7 ~ X7cluster == cluster_num,
        Xcluster == 6 ~ X6cluster == cluster_num,
        Xcluster == 5 ~ X5cluster == cluster_num,
        Xcluster == 4 ~ X4cluster == cluster_num,
        Xcluster == 3 ~ X3cluster == cluster_num
      )
    )
  }
  
  
  for(i in 1:nrow(df)){
    bear_count <- 0
    bee_count <- 0
    meadow_count <- 0
    trout_count <- 0
    eagle_count <- 0
    rabbit_count <- 0
    dragonfly_count <- 0
    fox_count <- 0
    deer_count <- 0
    stream_count <- 0
    wolf_count <- 0
    
    for(j in 2:21){
      if(df[i,j] == "Bear"){
        bear_count = bear_count + 1
      }else if(df[i,j] == "Bee"){
        bee_count = bee_count + 1
      }else if(df[i,j] == "Meadow"){
        meadow_count = meadow_count + 1
      }else if(df[i,j] == "Trout"){
        trout_count = trout_count + 1
      }else if(df[i,j] == "Eagle"){
        eagle_count = eagle_count + 1
      }else if(df[i,j] == "Rabbit"){
        rabbit_count = rabbit_count + 1
      }else if(df[i,j] == "Dragonfly"){
        dragonfly_count = dragonfly_count + 1
      }else if(df[i,j] == "Fox"){
        fox_count = fox_count + 1
      }else if(df[i,j] == "Deer"){
        deer_count = deer_count + 1
      }else if(df[i,j] == "Stream"){
        stream_count = stream_count + 1
      }else if(df[i,j] == "Wolf"){
        wolf_count = wolf_count + 1
      }else{
        print("bugged")
      }
    }
    
    bear_exact[bear_count+1] = bear_exact[bear_count+1] + 1
    bee_exact[bee_count+1] = bee_exact[bee_count+1] + 1
    meadow_exact[meadow_count+1] = meadow_exact[meadow_count+1] + 1
    trout_exact[trout_count+1] = trout_exact[trout_count+1] + 1
    eagle_exact[eagle_count+1] = eagle_exact[eagle_count+1] + 1
    rabbit_exact[rabbit_count+1] = rabbit_exact[rabbit_count+1] + 1
    dragonfly_exact[dragonfly_count+1] = dragonfly_exact[dragonfly_count+1] + 1
    fox_exact[fox_count+1] = fox_exact[fox_count+1] + 1
    deer_exact[deer_count+1] = deer_exact[deer_count+1] + 1
    stream_exact[stream_count+1] = stream_exact[stream_count+1] + 1
    wolf_exact[wolf_count+1] = wolf_exact[wolf_count+1] + 1
    
  }
  
  bear_exact[8] = bear_exact[8]+bear_exact[9]+bear_exact[10]+
    bear_exact[11]+bear_exact[12]+bear_exact[13]
  
  bee_exact[8] = bee_exact[8]+bee_exact[9]
  
  meadow_exact[8] = meadow_exact[8]+meadow_exact[9]+meadow_exact[10]+
    meadow_exact[11]+meadow_exact[12]+meadow_exact[13]+meadow_exact[14]+
    meadow_exact[15]+meadow_exact[16]+meadow_exact[17]+meadow_exact[18]+
    meadow_exact[19]+meadow_exact[20]+meadow_exact[21]
  
  trout_exact[8] = trout_exact[8]+trout_exact[9]+trout_exact[10]+
    trout_exact[11]
  
  eagle_exact[8] = eagle_exact[8]+eagle_exact[9]
  
  rabbit_exact[8] = rabbit_exact[8]+rabbit_exact[9]
  
  dragonfly_exact[8] = dragonfly_exact[8]+dragonfly_exact[9]
  
  fox_exact[8] = fox_exact[8]+fox_exact[9]+fox_exact[10]+
    fox_exact[11]+fox_exact[12]+fox_exact[13]
  
  deer_exact[8] = deer_exact[8]+deer_exact[9]+deer_exact[10]+
    deer_exact[11]+deer_exact[12]+deer_exact[13]
  
  stream_exact[8] = stream_exact[8]+stream_exact[9]+stream_exact[10]+
    stream_exact[11]+stream_exact[12]+stream_exact[13]+stream_exact[14]+
    stream_exact[15]+stream_exact[16]+stream_exact[17]+stream_exact[18]+
    stream_exact[19]+stream_exact[20]+stream_exact[21]
  
  wolf_exact[8] = wolf_exact[8]+wolf_exact[9]+wolf_exact[10]+
    wolf_exact[11]+wolf_exact[12]+wolf_exact[13]
  
  bear_exact = bear_exact[1:8]
  bee_exact = bee_exact[1:8]
  meadow_exact = meadow_exact[1:8]
  trout_exact = trout_exact[1:8]
  eagle_exact = eagle_exact[1:8]
  rabbit_exact = rabbit_exact[1:8]
  dragonfly_exact = dragonfly_exact[1:8]
  fox_exact = fox_exact[1:8]
  deer_exact = deer_exact[1:8]
  stream_exact = stream_exact[1:8]
  wolf_exact = wolf_exact[1:8]
  
  result <- data.frame(
    card = c(rep("Bear", 8), 
             rep("Bee", 8),
             rep("Meadow", 8),
             rep("Trout", 8),
             rep("Eagle", 8),
             rep("Rabbit", 8),
             rep("Dragonfly", 8),
             rep("Fox", 8),
             rep("Deer", 8),
             rep("Stream", 8),
             rep("Wolf", 8)
             ),
    num_exact = c(seq(0,7),
                  seq(0,7),
                  seq(0,7),
                  seq(0,7),
                  seq(0,7),
                  seq(0,7),
                  seq(0,7),
                  seq(0,7),
                  seq(0,7),
                  seq(0,7),
                  seq(0,7)
                  ),
    proportion = c(bear_exact/(nrow(df)),
              bee_exact/(nrow(df)),
              meadow_exact/(nrow(df)),
              trout_exact/(nrow(df)),
              eagle_exact/(nrow(df)),
              rabbit_exact/(nrow(df)),
              dragonfly_exact/(nrow(df)),
              fox_exact/(nrow(df)),
              deer_exact/(nrow(df)),
              stream_exact/(nrow(df)),
              wolf_exact/(nrow(df))
              )
    
  )
  
  return(result)
}
Code
make_exact_bars <- function(df, title){
  ggplot(aes(x = factor(num_exact),
           y = proportion,
           fill = factor(num_exact)),
         data =
           (df %>%
              filter(proportion != 0))
        ) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d() +
  facet_wrap(vars(card),scales = "free_x") +
  labs(x="Exact number of cards",
       fill="Exact number of cards",
       title=title)
}

7cluster Overall Proportions

Code
nrow(database_new %>% filter(pool == "default"))
[1] 30000
Code
default_pool_props <- exact_card_count(database=database_new)
Code
ggplot(aes(x = factor(num_exact),
           y = proportion,
           fill = factor(num_exact)),
         data =
           (default_pool_props %>%
              filter(proportion != 0))
        ) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d() +
  facet_wrap(vars(card)) +
  labs(x="Exact number of cards",
       fill="Exact number of cards",
       title="Overall proportions of the default pool")

Cluster 7

Summary statistics for cluster 7

Code
cluster_table(7, "Cluster_7", cluster_data)
  mean_score sd_score p25th median p75th count
1   72.38033 8.294532    66     71    77  3142

Proportion of each card type out of all the cards in the cluster

Code
make_bars(cluster_prop(7, "Cluster_7", cluster_data), "7Cluster: Cluster_7")

Distributions of the exact number of each card type out of all grids in the cluster

Code
make_exact_bars(exact_card_count(7, "Cluster_7", cluster_data), title="7Cluster: Cluster_7")

Cluster 6

Code
cluster_table(7, "Cluster_6", cluster_data)
  mean_score sd_score p25th median p75th count
1   73.23918 6.733394    68     73    78  5870
Code
make_bars(cluster_prop(7, "Cluster_6", cluster_data), "7Cluster: Cluster_6")

Code
make_exact_bars(exact_card_count(7, "Cluster_6", cluster_data), title="7Cluster: Cluster_6")

Cluster 3

Code
cluster_table(7, "Cluster_3", cluster_data)
  mean_score sd_score p25th median p75th count
1    70.8507 5.061589    67     71    75  6671
Code
make_bars(cluster_prop(7, "Cluster_3", cluster_data), "7Cluster: Cluster_3")

Code
make_exact_bars(exact_card_count(7, "Cluster_3", cluster_data), title="7Cluster: Cluster_3")

Cluster 4

Code
cluster_table(7, "Cluster_4", cluster_data)
  mean_score sd_score p25th median p75th count
1   69.97354  4.80268    66     70    73  5405
Code
make_bars(cluster_prop(7, "Cluster_4", cluster_data), "7Cluster: Cluster_4")

Code
make_exact_bars(exact_card_count(7, "Cluster_4", cluster_data), title="7Cluster: Cluster_4")

Cluster 2

Code
cluster_table(7, "Cluster_2", cluster_data)
  mean_score sd_score p25th median p75th count
1   67.85015 4.099993    65     67    71  4291
Code
make_bars(cluster_prop(7, "Cluster_2", cluster_data), "7Cluster: Cluster_2")

Code
make_exact_bars(exact_card_count(7, "Cluster_2", cluster_data), title="7Cluster: Cluster_2")

Cluster 1

Code
cluster_table(7, "Cluster_1", cluster_data)
  mean_score sd_score p25th median p75th count
1   68.84694 4.440481    65     68    72  4887
Code
make_bars(cluster_prop(7, "Cluster_1", cluster_data), "7Cluster: Cluster_1")

Code
make_exact_bars(exact_card_count(7, "Cluster_1", cluster_data), title="7Cluster: Cluster_1")

Cluster 5

Code
cluster_table(7, "Cluster_5", cluster_data)
  mean_score sd_score p25th median p75th count
1   67.80948  4.12117    64     67    71  5800
Code
make_bars(cluster_prop(7, "Cluster_5", cluster_data), "7Cluster: Cluster_5")

Code
make_exact_bars(exact_card_count(7, "Cluster_5", cluster_data), title="7Cluster: Cluster_5")

Additional Displays

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(overall_position)) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(card)) +
  labs(title="Positions across all 50,000")

Code
# all_cluster_prop <- rbind(cluster_prop(7, "Cluster_1", cluster_data) %>% mutate(cluster = "Cluster_1"),
#                           cluster_prop(7, "Cluster_2", cluster_data) %>% mutate(cluster = "Cluster_2"),
#                           cluster_prop(7, "Cluster_3", cluster_data) %>% mutate(cluster = "Cluster_3"),
#                           cluster_prop(7, "Cluster_4", cluster_data) %>% mutate(cluster = "Cluster_4"),
#                           cluster_prop(7, "Cluster_5", cluster_data) %>% mutate(cluster = "Cluster_5"),
#                           cluster_prop(7, "Cluster_6", cluster_data) %>% mutate(cluster = "Cluster_6"),
#                           cluster_prop(7, "Cluster_7", cluster_data) %>% mutate(cluster = "Cluster_7")
#                           )
Code
all_exact <- rbind(exact_card_count(7, "Cluster_1", cluster_data) %>% mutate(cluster = "Cluster_1"),
                   exact_card_count(7, "Cluster_2", cluster_data) %>% mutate(cluster = "Cluster_2"),
                   exact_card_count(7, "Cluster_3", cluster_data) %>% mutate(cluster = "Cluster_3"),
                   exact_card_count(7, "Cluster_4", cluster_data) %>% mutate(cluster = "Cluster_4"),
                   exact_card_count(7, "Cluster_5", cluster_data) %>% mutate(cluster = "Cluster_5"),
                   exact_card_count(7, "Cluster_6", cluster_data) %>% mutate(cluster = "Cluster_6"),
                   exact_card_count(7, "Cluster_7", cluster_data) %>% mutate(cluster = "Cluster_7")
                   )
Code
# write.csv(all_exact, here::here("all_exact.csv"), row.names = FALSE)
Code
# write.csv(all_cluster_prop, here::here("cluster_prop.csv"), row.names = FALSE)
Code
all_cluster_prop <- read.csv(here::here("cluster_prop.csv")) %>%
  mutate(cluster = factor(cluster, levels = paste0("Cluster_", 1:7)))
Code
all_exact <- read.csv(here::here("all_exact.csv")) 
Code
cluster_data %>%
  group_by(X7cluster) %>%
  summarize(mean_score=mean(score),
            sd_score=sd(score),
            p25th=quantile(score,probs=0.25),
            median=quantile(score,probs=0.5),
            p75th=quantile(score,probs=0.75),
            count=n(),
            .groups = 'drop') %>%
  arrange(desc(mean_score))
# A tibble: 7 × 7
  X7cluster mean_score sd_score p25th median p75th count
  <chr>          <dbl>    <dbl> <dbl>  <dbl> <dbl> <int>
1 Cluster_6       73.2     6.73    68     73    78  5870
2 Cluster_7       72.4     8.29    66     71    77  3142
3 Cluster_3       70.9     5.06    67     71    75  6671
4 Cluster_4       70.0     4.80    66     70    73  5405
5 Cluster_1       68.8     4.44    65     68    72  4887
6 Cluster_2       67.9     4.10    65     67    71  4291
7 Cluster_5       67.8     4.12    64     67    71  5800
Code
ggplot(aes(x = reorder(name, -true_prop), 
           y = proportion, 
           fill = reorder(name, -true_prop)), 
       data=all_cluster_prop) + 
  geom_bar(stat = "identity") +
  geom_point(aes(x = reorder(name, -true_prop), y= true_prop),
             color = "red",
             data=all_cluster_prop,
             show.legend = FALSE) +
  scale_fill_brewer(palette="PRGn", direction = -1) +
  labs(x = "card", fill="card type", title="Propotion of card types in each cluster") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  facet_wrap(vars(cluster))

Code
ggplot(all_cluster_prop, aes(x = proportion, y = cluster, color = name)) +
  geom_point(
    size = 3,
    alpa = 0.7,
    position = position_jitter(height = 0.2, seed = 110)
  ) +
  scale_color_brewer(palette = "Paired") +
  labs(
    x = "Proportion", 
    y = "Cluster",
    color = "Card Type",
    title = "Card Proportions by Cluster"
  ) +
  theme_minimal() +
  theme(
    panel.grid.major.y = element_line(color = "grey90"),
    panel.grid.minor = element_blank(),
    legend.position = "right"
  )

Code
ggplot(aes(x = cluster,
           y = proportion,
           fill = factor(num_exact, levels = rev(0:7))),
         data = all_exact
        ) +
  geom_bar(position = "fill", stat = "identity") +
  scale_fill_viridis_d() +
  facet_wrap(vars(card)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  labs(x="Clusters",
       fill="Exact number of cards",
       title="Exact number of cards in each cluster")

Code
ggplot(all_exact, aes(x = num_exact, y = proportion, color = cluster)) +
  geom_line(size = 0.5) +
  facet_wrap(~ card) +
  scale_x_continuous(breaks = 0:7) +
  labs(x = "Exact Number of Cards",
       y = "Proportion",
       color = "Cluster",
       title = "Exact number of cards in each cluster") +
  theme_minimal() +
  theme(legend.position = "bottom",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.text = element_text(size = 10, face = "bold"),
        panel.spacing = unit(1, "lines"))

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position %>% filter(card == "Bear"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(cluster)) +
  labs(title="Bear positions across clusters")

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position %>% filter(card == "Bee"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(cluster)) +
  labs(title="Bee positions across clusters")

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position %>% filter(card == "Meadow"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(cluster)) +
  labs(title="Meadow positions across clusters")

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position %>% filter(card == "Trout"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(cluster)) +
  labs(title="Trout positions across clusters")

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position %>% filter(card == "Eagle"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(cluster)) +
  labs(title="Eagle positions across clusters")

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position %>% filter(card == "Rabbit"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(cluster)) +
  labs(title="Rabbit positions across clusters")

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position %>% filter(card == "Dragonfly"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(cluster)) +
  labs(title="Dragonfly positions across clusters")

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position %>% filter(card == "Fox"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(cluster)) +
  labs(title="Fox positions across clusters")

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position %>% filter(card == "Deer"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(cluster)) +
  labs(title="Deer positions across clusters")

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position %>% filter(card == "Stream"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(cluster)) +
  labs(title="Stream positions across clusters")

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position %>% filter(card == "Wolf"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(cluster)) +
  labs(title="Wolf positions across clusters")

Unclustered

Code
db_pos = read_csv(here::here("database.csv"))
Code
neighbors = db_pos |>
  filter(score > 60) |>
  select(!pool) |>
  pivot_longer(!c(ID, score), names_to = "position", values_to = "card") |>
  mutate(row = substr(position, start = 4, stop = 4),
         col = substr(position, start = 8, stop = 8),
         row = as.numeric(row),
         col = as.numeric(col))
Code
# neighboring card on right
neighbors = neighbors |>
  left_join(neighbors |>
              mutate(col = col - 1) |>
              rename(right_neighbor = card) |>
              select(ID, row, col, right_neighbor),
            join_by(ID, row, col))

# neighboring card on left
neighbors = neighbors |>
  left_join(neighbors |>
              mutate(col = col + 1) |>
              rename(left_neighbor = card) |>
              select(ID, row, col, left_neighbor),
            join_by(ID, row, col))

# neighboring card up
neighbors = neighbors |>
  left_join(neighbors |>
              mutate(row = row - 1) |>
              rename(up_neighbor = card) |>
              select(ID, row, col, up_neighbor),
            join_by(ID, row, col))

# neighboring card down
neighbors = neighbors |>
  left_join(neighbors |>
              mutate(row = row + 1) |>
              rename(down_neighbor = card) |>
              select(ID, row, col, down_neighbor),
            join_by(ID, row, col))
Code
neighbors |> head(10)
# A tibble: 10 × 10
      ID score position card        row   col right_neighbor left_neighbor
   <dbl> <dbl> <chr>    <chr>     <dbl> <dbl> <chr>          <chr>        
 1     1    67 row1col1 Meadow        1     1 Meadow         <NA>         
 2     1    67 row1col2 Meadow        1     2 Deer           Meadow       
 3     1    67 row1col3 Deer          1     3 Dragonfly      Meadow       
 4     1    67 row1col4 Dragonfly     1     4 Fox            Deer         
 5     1    67 row1col5 Fox           1     5 <NA>           Dragonfly    
 6     1    67 row2col1 Meadow        2     1 Bee            <NA>         
 7     1    67 row2col2 Bee           2     2 Bear           Meadow       
 8     1    67 row2col3 Bear          2     3 Trout          Bee          
 9     1    67 row2col4 Trout         2     4 Deer           Bear         
10     1    67 row2col5 Deer          2     5 <NA>           Trout        
# ℹ 2 more variables: up_neighbor <chr>, down_neighbor <chr>
Code
neighbors_long = neighbors |>
  pivot_longer(!c(ID, score, position, row, col, card),
               names_to = "neighbor",
               values_to = "neighbor_card")
Code
neighbors_long |> head(10)
# A tibble: 10 × 8
      ID score position card     row   col neighbor       neighbor_card
   <dbl> <dbl> <chr>    <chr>  <dbl> <dbl> <chr>          <chr>        
 1     1    67 row1col1 Meadow     1     1 right_neighbor Meadow       
 2     1    67 row1col1 Meadow     1     1 left_neighbor  <NA>         
 3     1    67 row1col1 Meadow     1     1 up_neighbor    Meadow       
 4     1    67 row1col1 Meadow     1     1 down_neighbor  <NA>         
 5     1    67 row1col2 Meadow     1     2 right_neighbor Deer         
 6     1    67 row1col2 Meadow     1     2 left_neighbor  Meadow       
 7     1    67 row1col2 Meadow     1     2 up_neighbor    Bee          
 8     1    67 row1col2 Meadow     1     2 down_neighbor  <NA>         
 9     1    67 row1col3 Deer       1     3 right_neighbor Dragonfly    
10     1    67 row1col3 Deer       1     3 left_neighbor  Meadow       
Code
neighbors_sum = neighbors_long |>
  filter(!is.na(neighbor_card)) |>
  group_by(card) |>
  count(neighbor_card) |>
  mutate(proportion = n / sum(n))
Code
ggplot(neighbors_sum,
       aes(x = card,
           y = neighbor_card,
           fill = proportion)) +
  geom_tile() + 
  scale_fill_distiller(palette = "Greens", direction = 1)

Code
ggplot(neighbors_sum,
       aes(x = card,
           fill = neighbor_card,
           y = proportion)) +
  geom_bar(position = "fill", stat = "identity") +
  scale_fill_viridis_d()

Code
ggplot(neighbors_sum,
       aes(x = neighbor_card,
           y = proportion,
           fill = neighbor_card)) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d() +
  facet_wrap(vars(card))

Clustered

Code
db_cluster = read_csv(here::here("normalized_clusters.csv"))
Code
neighbors_cluster = db_cluster |>
  pivot_longer(!c(ID, pool, score, contains("cluster")),
               names_to = "position", values_to = "card") |>
  mutate(row = substr(position, start = 4, stop = 4),
         col = substr(position, start = 8, stop = 8),
         row = as.numeric(row),
         col = as.numeric(col))
Code
# neighboring card on right
neighbors_cluster = neighbors_cluster |>
  left_join(neighbors_cluster |>
              mutate(col = col - 1) |>
              rename(right_neighbor = card) |>
              select(ID, row, col, right_neighbor),
            join_by(ID, row, col))

# neighboring card on left
neighbors_cluster = neighbors_cluster |>
  left_join(neighbors_cluster |>
              mutate(col = col + 1) |>
              rename(left_neighbor = card) |>
              select(ID, row, col, left_neighbor),
            join_by(ID, row, col))

# neighboring card up
neighbors_cluster = neighbors_cluster |>
  left_join(neighbors_cluster |>
              mutate(row = row - 1) |>
              rename(up_neighbor = card) |>
              select(ID, row, col, up_neighbor),
            join_by(ID, row, col))

# neighboring card down
neighbors_cluster = neighbors_cluster |>
  left_join(neighbors_cluster |>
              mutate(row = row + 1) |>
              rename(down_neighbor = card) |>
              select(ID, row, col, down_neighbor),
            join_by(ID, row, col))
Code
neighbors_cluster |> head(10)
# A tibble: 10 × 16
      ID score pool    `7cluster` `6cluster` `5cluster` `4cluster` `3cluster`
   <dbl> <dbl> <chr>   <chr>      <chr>      <chr>      <chr>      <chr>     
 1     1    67 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 2     1    67 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 3     1    67 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 4     1    67 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 5     1    67 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 6     1    67 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 7     1    67 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 8     1    67 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 9     1    67 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
10     1    67 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
# ℹ 8 more variables: position <chr>, card <chr>, row <dbl>, col <dbl>,
#   right_neighbor <chr>, left_neighbor <chr>, up_neighbor <chr>,
#   down_neighbor <chr>
Code
neighbors_cluster_long = neighbors_cluster |>
  pivot_longer(!c(ID, pool, contains("cluster"), score, position, row, col, card),
               names_to = "neighbor",
               values_to = "neighbor_card")
Code
neighbors_cluster_long |> head(10)
# A tibble: 10 × 14
      ID score pool    `7cluster` `6cluster` `5cluster` `4cluster` `3cluster`
   <dbl> <dbl> <chr>   <chr>      <chr>      <chr>      <chr>      <chr>     
 1     1    67 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 2     1    67 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 3     1    67 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 4     1    67 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 5     1    67 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 6     1    67 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 7     1    67 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 8     1    67 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 9     1    67 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
10     1    67 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
# ℹ 6 more variables: position <chr>, card <chr>, row <dbl>, col <dbl>,
#   neighbor <chr>, neighbor_card <chr>

Additional Clustered neighbor

Code
neighbors_sum2 = neighbors_cluster_long |>
  filter(!is.na(neighbor_card)) |>
  group_by(card, `7cluster`) |>
  count(neighbor_card) |>
  mutate(proportion = n / sum(n))
Code
ggplot(neighbors_sum2,
       aes(x = `7cluster`,
           fill = neighbor_card,
           y = proportion)) +
  geom_bar(position = "fill", stat = "identity") +
  scale_fill_viridis_d() +
  facet_wrap(vars(card)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8))

Code
ggplot(neighbors_sum2,
       aes(x = card,
           y = neighbor_card,
           fill = proportion)) +
  geom_tile() + 
  scale_fill_distiller(palette = "Greens", direction = 1) +
  facet_wrap(vars(`7cluster`)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8))

Code
ggplot(neighbors_sum2 %>% filter(card == "Bear"),
       aes(x = neighbor_card,
           y = proportion,
           fill = neighbor_card)) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d() +
  theme_minimal() +
  facet_wrap(vars(`7cluster`)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  labs(title="Bear neighbors in each cluster")

Code
ggplot(neighbors_sum2 %>% filter(card == "Bee"),
       aes(x = neighbor_card,
           y = proportion,
           fill = neighbor_card)) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d() +
  theme_minimal() +
  facet_wrap(vars(`7cluster`)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  labs(title="Bee neighbors in each cluster")

Code
ggplot(neighbors_sum2 %>% filter(card == "Meadow"),
       aes(x = neighbor_card,
           y = proportion,
           fill = neighbor_card)) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d() +
  theme_minimal() +
  facet_wrap(vars(`7cluster`)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  labs(title="Meadow neighbors in each cluster")

Code
ggplot(neighbors_sum2 %>% filter(card == "Trout"),
       aes(x = neighbor_card,
           y = proportion,
           fill = neighbor_card)) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d() +
  theme_minimal() +
  facet_wrap(vars(`7cluster`)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  labs(title="Trout neighbors in each cluster")

Code
ggplot(neighbors_sum2 %>% filter(card == "Eagle"),
       aes(x = neighbor_card,
           y = proportion,
           fill = neighbor_card)) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d() +
  theme_minimal() +
  facet_wrap(vars(`7cluster`)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  labs(title="Eagle neighbors in each cluster")

Code
ggplot(neighbors_sum2 %>% filter(card == "Rabbit"),
       aes(x = neighbor_card,
           y = proportion,
           fill = neighbor_card)) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d() +
  theme_minimal() +
  facet_wrap(vars(`7cluster`)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  labs(title="Rabbit neighbors in each cluster")

Code
ggplot(neighbors_sum2 %>% filter(card == "Dragonfly"),
       aes(x = neighbor_card,
           y = proportion,
           fill = neighbor_card)) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d() +
  theme_minimal() +
  facet_wrap(vars(`7cluster`)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  labs(title="Dragonfly neighbors in each cluster")

Code
ggplot(neighbors_sum2 %>% filter(card == "Fox"),
       aes(x = neighbor_card,
           y = proportion,
           fill = neighbor_card)) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d() +
  theme_minimal() +
  facet_wrap(vars(`7cluster`)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  labs(title="Fox neighbors in each cluster")

Code
ggplot(neighbors_sum2 %>% filter(card == "Deer"),
       aes(x = neighbor_card,
           y = proportion,
           fill = neighbor_card)) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d() +
  theme_minimal() +
  facet_wrap(vars(`7cluster`)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  labs(title="Deer neighbors in each cluster")

Code
ggplot(neighbors_sum2 %>% filter(card == "Stream"),
       aes(x = neighbor_card,
           y = proportion,
           fill = neighbor_card)) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d() +
  theme_minimal() +
  facet_wrap(vars(`7cluster`)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  labs(title="Stream neighbors in each cluster")

Code
ggplot(neighbors_sum2 %>% filter(card == "Wolf"),
       aes(x = neighbor_card,
           y = proportion,
           fill = neighbor_card)) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d() +
  theme_minimal() +
  facet_wrap(vars(`7cluster`)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  labs(title="Wolf neighbors in each cluster")

Code
db_individual_removed <- read.csv(here::here("db_individual_lowest_removed.csv"))
Code
cluster_merge <- merge(x = cluster_data, y = db_individual_removed, by = "ID")
Code
ggplot(aes(x = score), data = cluster_merge) +
  geom_histogram(binwidth = 5, fill = "green3") +
  theme_minimal() +
  facet_wrap(vars(X7cluster)) +
  labs(title="Score distribution by cluster")

Code
ggplot(cluster_merge, aes(x = score, color = factor(X7cluster))) +
  geom_density(size = 1.2) +
  labs(x = "Score", y = "Density", color = "Cluster", title="Score distribution by cluster") +
  theme_minimal()

Code
low_score = database_new %>% anti_join(cluster_merge, by = "ID")
Code
ggplot(cluster_merge, aes(x = score, color = factor(X7cluster))) +
  geom_density(size = 1.2) +
  geom_density(aes(x=score, color = "All grids"), size = 1.2, data=database_new) +
  labs(x = "Score", y = "Density", color = "Cluster", title="Score distribution by cluster") +
  theme_minimal()

Code
# write.csv(cluster_merge, here::here("cluster_merge.csv"), row.names = FALSE)
Code
# write.csv(neighbors_cluster_long, here::here("neighbor-cluster.csv"), row.names = FALSE)

Testings

Code
two_player <- list(c(37,4,2,2), c(45,3,3,3))
four_player <- list(c(37,5,1,2), c(45,2,4,3), c(60,0,0,0), c(47,4,3,4))
five_player <- list(c(37,3,0,2), c(45,2,4,3), c(60,3,0,0), c(47,2,0,4), c(38,1,4,4))
Code
mp_score(two_player)
[1] 37 45
[1] 45 50
[1] 53 62
[1] 65 69
Code
mp_score(four_player)
[1] 37 45 60 47
[1] 45 45 60 52
[1] 49 57 60 60
[1] 61 60 72 63
Code
mp_score(five_player)
[1] 37 45 60 47 38
[1] 45 45 68 47 38
[1] 45 57 68 47 50
[1] 52 60 80 42 50
Code
z <- score_grid(sample_grid)
z
[1] 29  0  0  6
Code
z2 <- score_grid(sample_grid2)
z2
[1] 36  0  0  7
Code
z3 <- score_grid(big_grid1)
z3
[1] 46  3  1  0
Code
set.seed(48)
test_grid <- generate_grid(cards)
test_grid
     [,1]        [,2]        [,3]     [,4]     [,5]    
[1,] "Dragonfly" "Dragonfly" "Stream" "Meadow" "Fox"   
[2,] "Bear"      "Trout"     "Meadow" "Fox"    "Eagle" 
[3,] "Trout"     "Rabbit"    "Stream" "Bee"    "Stream"
[4,] "Wolf"      "Rabbit"    "Deer"   "Meadow" "Bee"   
Code
x <- find_cardinals(0,3,test_grid)
x
[[1]]
[1] 1 3

[[2]]
[1] 0 4

[[3]]
[1] 0 2
Code
c <- sample(x, 1)
c
[[1]]
[1] 1 3
Code
"el"
[1] "el"
Code
c[[1]][1]
[1] 1
Code
score_grid(test_grid)
[1] 26  1  1  2
Code
solo_score(score_grid(test_grid))
[1] 40
Code
set.seed(48)
baseline_scores <- baseline_sim(cards)
Code
mean(baseline_scores)
[1] 29.0535
Code
sd(baseline_scores)
[1] 9.467661
Code
var(baseline_scores)
[1] 89.6366
Code
max(baseline_scores)
[1] 64
Code
min(baseline_scores)
[1] 2
Code
summary(baseline_scores)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   2.00   23.00   29.00   29.05   35.25   64.00 
Code
baseline_data <- data.frame(baseline_scores)
ggplot(aes(x = baseline_scores), data = baseline_data) +
  geom_histogram(binwidth = 5, fill = "steelblue")